From 19eafab0ea37da64d0b4869786033ef25e5e73cc Mon Sep 17 00:00:00 2001 From: Till Englert Date: Fri, 24 Mar 2023 11:00:49 +0100 Subject: [PATCH 001/230] Initial commit for draft PR --- assets/report_styles.css | 29 ++ assets/report_template.Rmd | 311 ++++++++++++++++++++++ bin/generate_report.R | 70 +++++ conf/modules.config | 8 + modules/local/summary_report.nf | 63 +++++ subworkflows/local/dada2_preprocessing.nf | 9 +- workflows/ampliseq.nf | 22 ++ 7 files changed, 509 insertions(+), 3 deletions(-) create mode 100644 assets/report_styles.css create mode 100644 assets/report_template.Rmd create mode 100755 bin/generate_report.R create mode 100644 modules/local/summary_report.nf diff --git a/assets/report_styles.css b/assets/report_styles.css new file mode 100644 index 00000000..4e1988dc --- /dev/null +++ b/assets/report_styles.css @@ -0,0 +1,29 @@ +body { + font-family: Calibri, helvetica, sans-serif; + background: none transparent; +} + +h1 { + color: rgb(3, 101, 192); + font-size: 127%; +} + +.title { + margin-right: 200px; +} + +h2 { + color: rgb(3, 101, 192); + font-size: 121%; +} + +h3 { + font-size: 109%; + font-weight: bold; +} + +h4 { + font-size: 100%; + font-weight: bold; + font-style: italic; +} diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd new file mode 100644 index 00000000..c3b50c0b --- /dev/null +++ b/assets/report_template.Rmd @@ -0,0 +1,311 @@ +--- +title: Summary of nf-core/ampliseq results +output: + html_document: + toc: true # table of contents + toc_float: true # float the table of contents to the left of the main document content + toc_depth: 3 # header levels 1,2,3 + theme: default + number_sections: true # add section numbering to headers + df_print: paged # tables are printed as an html table with support for pagination over rows and columns + css: ./report_styles.css + highlight: pygments + pdf_document: true +#bibliography: ./references.bibtex +params: + flag_skip_fastqc: FALSE + flag_skip_cutadapt: FALSE + flag_skip_dada2: FALSE + flag_skip_dada2_: TRUE + flag_skip_barrnap: FALSE + flag_retain_untrimmed: TRUE + flag_trunclenf: "" + flag_trunclenr: "" + flag_trunc_qmin: "" + + mqc_plot: "" + ca_sum_path: "" + dada_filtntrim_args: "" + dada_qc_f_path: "" + dada_qc_r_path: "" + dada_pp_qc_f_path: "" + dada_pp_qc_r_path: "" + dada_1_err_path: "" + dada_2_err_path: "" + asv_table_path: "" + path_asv_fa: "" + path_dada2_tab: "" + dada_stats_path: "" + path_rrna_arc: "" + path_rrna_bac: "" + path_rrna_euk: "" + path_rrna_mito: "" + ref_tax_path: "" + asv_tax_path: "" +--- + +```{r setup, include=FALSE} +library("dplyr") +library("ggplot2") +library("knitr") +library("DT") +library("formattable") +library("purrr") +knitr::opts_chunk$set(echo = FALSE) +``` + +# Preprocessing + +```{r, eval = !params$flag_skip_fastqc, results='asis'} +mqc_rep_path <- paste0("../multiqc/multiqc_report.html") + +cat("## FastQC\n") +cat("The sequence quality was checked using FastQC and resulting data was ", + "aggregated using the FastQC module of MultiQC. For more quality ", + "controls and per sample quality checks you can check the full ", + "MultiQC report, which is found [here](", mqc_rep_path, ").", sep = "") +``` + +```{r, eval = !params$flag_skip_fastqc, out.width='100%', dpi=1200, fig.align='center'} +knitr::include_graphics(params$mqc_plot) +``` + +```{r, eval = !params$flag_skip_cutadapt, results='asis'} +# import tsv +cutadapt_summary <- read.table(file = params$ca_sum_path, header = TRUE, sep = "\t") + +passed_col <- as.numeric(substr( + cutadapt_summary$cutadapt_passing_filters_percent, 1, 4)) + +max_disc <- 100 - min(passed_col) +avg_passed <- mean(passed_col) + +cutadapt_text_unch <- paste0("## Cutadapt\n", + "Remaining primers were trimmed using cutadapt") +cutadapt_text_ch <- paste0("and all untrimmed sequences were discarded. <", + max_disc, "% of the sequences were discarded per sample and a mean of ", + avg_passed, "% of the sequences per sample passed the filtering.") + +if (!params$flag_retain_untrimmed) cutadapt_text <- paste0( + cutadapt_text_unch, cutadapt_text_ch + ) else cutadapt_text <- paste0(cutadapt_text_unch, ".") + +cat(cutadapt_text) + +datatable(cutadapt_summary, options = list( + scrollX = TRUE, + scrollY = "300px", + paging = FALSE)) + +cutadapt_summary$passed_num <- passed_col + +ggplot(cutadapt_summary, + aes(x = sample, y = passed_col)) + + geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) + + ylab("% passing filters of cutadapt") + + xlab("Samples") + + coord_flip() + + theme_bw() +``` + +```{r, eval = !params$flag_skip_dada2, results='asis'} +dada2_dir <- paste0("../dada2/QC/") +cat("## QC using DADA2\n") +cat("Further quality checks were performed using the DADA2 package and ", + "forward and reverse quality stats are displayed as well as the ", + "respective preprocessed quality stats. The original plots can be", + "found [here](", dada2_dir, ").", sep = "") +``` + +```{r, eval = !params$flag_skip_dada2, results='asis'} +if (params$flag_trunc_qmin != -1) { + f_and_tr_args <- readLines(params$dada_filtntrim_args) + trunc_len <- strsplit(gsub(".*truncLen = c\\((.+)\\),maxN.*", "\\1", + f_and_tr_args), ", ") + tr_len_f <- trunc_len[[1]][1] + tr_len_r <- trunc_len[[1]][2] + no_trunclen <- cat("Reads were trimmed before median quality drops ", + "below ", params$flag_trunc_qmin, " resulting in a trim of ", + "forward reads at ", tr_len_f, " bp and reverse ", + "reads at ", tr_len_r, " bp.", sep = "") +} else { + trunclen <- cat("Forward reads were trimmed at ", params$flag_trunclenf, + " bp and reverse reads were trimmed at ", params$flag_trunclenr, + " bp.", sep = "") +} +``` + +```{r, eval = !params$flag_skip_dada2, results='asis', out.width="49%", fig.show='hold',fig.align='center'} +# TODO FW or RV may not exist +# TODO svg seems to have an error +knitr::include_graphics(c(params$dada_qc_f_path,params$dada_qc_r_path)) +``` + +```{r, eval = !params$flag_skip_dada2, results='asis', out.width="49%", fig.show='hold',fig.align='center'} +# TODO FW or RV may not exist +# TODO same issue, also error in svg +knitr::include_graphics(c(params$dada_pp_qc_f_path, params$dada_pp_qc_r_path)) +``` + +```{r, eval = !params$flag_skip_dada2, results='asis'} +cat("## Error correction using DADA2\n") +cat("Error correction was performed using DADA2 as well and the original ", + "plots can be found [here](../dada2/QC/).", sep = "") +``` + +```{r, eval = !params$flag_skip_dada2, results = 'asis', out.width="49%", fig.show='hold',fig.align='center'} +# TODO Error profiles per run (Name may change default 1) +# TODO paried end produces *_2* otherwise only _1 + +knitr::include_graphics(c(params$dada_1_err_path, params$dada_2_err_path)) +``` + +```{r, eval = !params$flag_skip_dada2_, results='asis'} +# Header +cat("## ASV inference using DADA2\n") + +# import asv tsv +asv_table_path <- paste0(params$path_pl_results, "/dada2/ASV_table.tsv") +asv_table <- read.table(file = asv_table_path, header = TRUE, sep = "\t") +n_asv <- length(asv_table$ASV_ID) + +# Define additional table paths +path_asv_fa <- paste0(params$path_pl_results, "/dada2/ASV_seqs.fasta") +path_dada2_tab <- paste0(params$path_pl_results, "/dada2/DADA2_table.tsv") + +# Output text +cat(n_asv, + "amplicon sequence variants (ASVs) were obtained across all samples. ") +cat("The ASVs can be found [here](", path_asv_fa, "). And the corresponding", + " quantification of the ASVs across samples can be found ", + "[here](", path_asv_path, "). An extensive table containing both can ", + "be found [here](", path_dada2_tab, ").", sep = "") +``` + +```{r, eval = !params$flag_skip_dada2_, results='asis'} +# import stats tsv +dada_stats_path <- paste0(params$path_pl_results, "/dada2/DADA2_stats.tsv") +dada_stats <- read.table(file = dada_stats_path, header = TRUE, sep = "\t") + +# Display table +datatable(dada_stats, options = list( + scrollX = TRUE, + scrollY = "300px", + paging = FALSE)) + +# Stacked barchart to num of reads + +# Calc exluded asvs and transform all cols to percent +dada_stats_ex <- data.frame(sample = dada_stats$sample, + DADA2_input = dada_stats$DADA2_input, + filtered = dada_stats$DADA2_input-dada_stats$filtered, + denoisedF = dada_stats$filtered-dada_stats$denoisedF, + denoisedR = dada_stats$denoisedF-dada_stats$denoisedR, + merged = dada_stats$denoisedR-dada_stats$merged, + nonchim = dada_stats$merged-dada_stats$nonchim, + analysis = dada_stats$nonchim) + +dada_stats_p <- data.frame(sample = dada_stats_ex$sample, round(dada_stats_ex[2:8]/dada_stats_ex$DADA2_input, 2)) + +# Stack columns for both stacked barcharts +n_samples <- length(dada_stats_p$sample) +samples_t <- c(rep(dada_stats_p$sample, 6)) +steps_t <- c(rep("excluded by filtering", n_samples), rep("excluded by denoisedF", n_samples), + rep("excluded by denoisedR", n_samples), rep("excluded by merged", n_samples), + rep("excluded by nonchim", n_samples), rep("ready for analysis", n_samples)) + +# stack the column for absolute number of asvs +asvs_abs_t <- as.array(flatten_dbl(dada_stats_ex[3:8])) +dada_stats_ex_t <- data.frame(samples_t, steps_t, asvs_abs_t) + +# Plot +dada_stats_ex_t$steps_t <- factor(dada_stats_ex_t$steps_t, levels=unique(dada_stats_ex_t$steps_t)) +ggplot(dada_stats_ex_t, aes(fill = steps_t, y = asvs_abs_t, x = samples_t)) + + geom_bar(position = "stack", stat = "identity") + + xlab("Samples") + + ylab("Absolute number ASVs") + + coord_flip() + + scale_fill_brewer("Filtering Steps", palette = "Spectral") + +# stack the column for percentage of asvs +asvs_p_t <- as.array(flatten_dbl(dada_stats_p[3:8])) +dada_stats_p_t <- data.frame(samples_t, steps_t, asvs_p_t) + +# Plot +dada_stats_p_t$steps_t <- factor(dada_stats_p_t$steps_t, levels=unique(dada_stats_p_t$steps_t)) +ggplot(dada_stats_p_t, aes(fill = steps_t, y = asvs_p_t, x = samples_t)) + + geom_bar(position = "fill", stat = "identity") + + xlab("Samples") + + ylab("% of total ASVs") + + coord_flip() + + scale_fill_brewer("Filtering Steps", palette = "Spectral") +``` + + +# Filtering of ASVs + + + +# Taxonomic Classification + +```{r, eval = !params$flag_skip_dada2_, results='asis'} +# Header +cat("## Taxonomic Classification using DADA2\n") + +ref_tax_path <- paste0(params$path_pl_results, "/dada2/ref_taxonomy.txt") +ref_tax <- readLines(ref_tax_path) + +db <- "Unknown DB" +for (line in ref_tax){ + if (grepl("Title:", line)) { + db <- sub(".*Title: ", "", line) + } +} + +# Output text db +cat("The taxonomic classification was performed by DADA2 using the database: ", + "\"", db, "\".\n\n", sep = "") + +asv_tax_path <- paste0(params$path_pl_results, "/dada2/ASV_tax_species.tsv") +asv_tax <- read.table(asv_tax_path, header = TRUE, sep = "\t") + +# Calculate the classified numbers/percent of asv +level <- c("Domain", "Kingdom", "Phylum", "Class", "Order", "Family", "Genus") +n_asv_unclassified <- c(count(asv_tax, Domain)$n[1], + count(asv_tax, Kingdom)$n[1], + count(asv_tax, Phylum)$n[1], + count(asv_tax, Class)$n[1], + count(asv_tax, Order)$n[1], + count(asv_tax, Family)$n[1], + count(asv_tax, Genus)$n[1]) +n_asv_classified <- n_asv - n_asv_unclassified +p_asv_classified <- round(n_asv_classified / n_asv * 100, 2) + +asv_classi_df <- data.frame(level, n_asv_classified, p_asv_classified) + +# Build output string +outputstr <- "DADA2 classified " + +for (row in seq_len(nrow(asv_classi_df))) { + outputstr <- paste0(outputstr, asv_classi_df[row, ]$p_asv_classified, + " % ASVs at ", asv_classi_df[row, ]$level, " level") + switch(as.character(row), + "6" = outputstr <- paste0(outputstr, " and "), + "7" = outputstr <- paste0(outputstr, ".\n\n"), + outputstr <- paste0(outputstr, ", ")) +} + +# Output Text Classifications +cat(outputstr) + +# Barplot +# Plot +asv_classi_df$level <- factor(asv_classi_df$level, levels = asv_classi_df$level) +ggplot(asv_classi_df, + aes(x = reorder(level, desc(level)), y = p_asv_classified)) + + geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) + + ylab("% Classification") + + xlab("Levels") + + coord_flip() + + theme_bw() +``` diff --git a/bin/generate_report.R b/bin/generate_report.R new file mode 100755 index 00000000..79180eac --- /dev/null +++ b/bin/generate_report.R @@ -0,0 +1,70 @@ +#!/usr/bin/env Rscript + +library(rmarkdown) +library(optparse) + + +option_list = list( + make_option(c("-r", "--report"), type="character", default=NULL, help="report template file", metavar="character"), + make_option(c("-o", "--output"), type="character", default="ampliseq_report.html", help="output file name", metavar="character"), + make_option(c("--skip_fastqc"), action="store_true", default=FALSE, help="Trigger to skip fastqc reporting", metavar="logical"), + make_option(c("--skip_cutadapt"), action="store_true", default=FALSE, help="Trigger to skip cutadapt filtering", metavar="logical"), + make_option(c("--skip_dada2"), action="store_true", default=FALSE, help="Trigger to skip dada2", metavar="logical"), + make_option(c("--skip_barrnap"), action="store_true", default=FALSE, help="Trigger to skip barrnap ASV filtering", metavar="logical"), + make_option(c("--retain_untrimmed"), action="store_true", default=FALSE, help="Flag to retain the untrimmed sequences", metavar="logical"), + make_option(c("--trunclenf"), type="numeric", default=-1, help="Flag to define truncation in forward strand", metavar="numeric"), + make_option(c("--trunclenr"), type="numeric", default=-1, help="Flag to define truncation in reverse strand", metavar="numeric"), + make_option(c("--trunc_qmin"), type="numeric", default=-1, help="Flag to define truncation via quality measure. Set to -1 if trunclen were given.", metavar="numeric"), + make_option(c("--mqc_plot"), type="character", default=NULL, help="MultiQC plot per sequence quality", metavar="character"), + make_option(c("--ca_sum_path"), type="character", default=NULL, help="cutadapt summary table", metavar="character"), + make_option(c("--dada_filtntrim_args"), type="character", default=NULL, help="DADA2 arguments for filter and trim process", metavar="character"), + make_option(c("--dada_qc_f_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--dada_qc_r_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--dada_pp_qc_f_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--dada_pp_qc_r_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--dada_1_err_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--dada_2_err_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--asv_table_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--path_asv_fa"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--path_dada2_tab"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--dada_stats_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--path_rrna_arc"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--path_rrna_bac"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--path_rrna_euk"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--path_rrna_mito"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--ref_tax_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--asv_tax_path"), type="character", default=NULL, help="MultiQC plots", metavar="character") +) + +opt_parser = OptionParser(option_list = option_list) +opt = parse_args(opt_parser) + +rmarkdown::render(opt$report, output_file = opt$output, + params = list( + flag_skip_fastqc = opt$skip_fastqc, + flag_skip_cutadapt = opt$skip_cutadapt, + flag_skip_dada2 = opt$skip_dada2, + flag_skip_barrnap = opt$skip_barrnap, + flag_retain_untrimmed = opt$retain_untrimmed, + flag_trunclenf = opt$trunclenf, + flag_trunclenr = opt$trunclenr, + flag_trunc_qmin = opt$trunc_qmin, + mqc_plot = opt$mqc_plot, + ca_sum_path = opt$ca_sum_path, + dada_filtntrim_args = opt$dada_filtntrim_args, + dada_qc_f_path = opt$dada_qc_f_path, + dada_qc_r_path = opt$dada_qc_r_path, + dada_pp_qc_f_path = opt$dada_pp_qc_f_path, + dada_pp_qc_r_path = opt$dada_pp_qc_r_path, + dada_1_err_path = opt$dada_1_err_path, + dada_2_err_path = opt$dada_2_err_path, + asv_table_path = opt$asv_table_path, + path_asv_fa = opt$path_asv_fa, + path_dada2_tab = opt$path_dada2_tab, + dada_stats_path = opt$dada_stats_path, + path_rrna_arc = opt$path_rrna_arc, + path_rrna_bac = opt$path_rrna_bac, + path_rrna_euk = opt$path_rrna_euk, + path_rrna_mito = opt$path_rrna_mito, + ref_tax_path = opt$ref_tax_path, + asv_tax_path = opt$asv_tax_path)) diff --git a/conf/modules.config b/conf/modules.config index d0bf626a..a8c854f2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -598,4 +598,12 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + + withName: SUMMARY_REPORT { + publishDir = [ + path: { "${params.outdir}/Summary_Report" }, + mode: params.publish_dir_mode, + pattern: '*.html' + ] + } } diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf new file mode 100644 index 00000000..d2139268 --- /dev/null +++ b/modules/local/summary_report.nf @@ -0,0 +1,63 @@ +process SUMMARY_REPORT { + + label 'process_low' + + container 'tillenglert/ampliseq_report:latest' + //conda (params.enable_conda ? "bioconda:r-markdown==0.8--r3.4.1_1" : null) + //container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + // 'https://depot.galaxyproject.org/singularity/r-markdown:0.8--r3.4.1_1' : + // 'quay.io/biocontainers/r-markdown:0.8--r3.4.1_1' }" + + input: + path(report_template) + path(report_styles) + path(mqc_plots) + path(ca_summary) + path(dada_filtntrim_args) + path(dada_fw_qual_stats) + path(dada_rv_qual_stats) + path(dada_pp_fw_qual_stats) + path(dada_pp_rv_qual_stats) + tuple val(meta), path(dada_err_svgs) + + + output: + path "Summary_Report.html" , emit: report + //path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def skip_fastqc = params.skip_fastqc ? "--skip_fastqc" : "" + def skip_cutadapt = params.skip_cutadapt ? "--skip_cutadapt" : "" + def skip_dada2 = params.skip_dada_quality ? "--skip_dada2" : "" + def skip_barrnap = params.skip_barrnap ? "--skip_barrnap" : "" + def retain_untrimmed = params.retain_untrimmed ? "--retain_untrimmed" : "" + """ + generate_report.R --report $report_template \\ + --output "Summary_Report.html" \\ + --mqc_plot "${mqc_plots}/svg/mqc_fastqc_per_sequence_quality_scores_plot_1.svg" \\ + --ca_sum_path $ca_summary \\ + --dada_filtntrim_args $dada_filtntrim_args \\ + --dada_qc_f_path $dada_fw_qual_stats \\ + --dada_qc_r_path $dada_rv_qual_stats \\ + --dada_pp_qc_f_path $dada_pp_fw_qual_stats \\ + --dada_pp_qc_r_path $dada_pp_rv_qual_stats \\ + --dada_1_err_path ./1_1.err.svg \\ + --dada_2_err_path ./1_2.err.svg \\ + $skip_fastqc \\ + $skip_cutadapt \\ + $skip_dada2 \\ + $skip_barrnap \\ + $retain_untrimmed \\ + --trunclenf $params.trunclenf \\ + --trunclenr $params.trunclenr \\ + --trunc_qmin $params.trunc_qmin + """ + //--pl_results $results_dir \\ + //cat <<-END_VERSIONS > versions.yml + //"${task.process}": + // R: \$(R --version 2>&1 | sed -n 1p | sed 's/R version //' | sed 's/ (.*//') + //END_VERSIONS +} diff --git a/subworkflows/local/dada2_preprocessing.nf b/subworkflows/local/dada2_preprocessing.nf index fb2b44f3..e2e6bd42 100644 --- a/subworkflows/local/dada2_preprocessing.nf +++ b/subworkflows/local/dada2_preprocessing.nf @@ -118,7 +118,10 @@ workflow DADA2_PREPROCESSING { .set { ch_filt_reads } emit: - reads = ch_filt_reads - logs = DADA2_FILTNTRIM.out.log - versions = ch_versions_dada2_preprocessing + reads = ch_filt_reads + logs = DADA2_FILTNTRIM.out.log + args = DADA2_FILTNTRIM.out.args + qc_svg = DADA2_QUALITY1.out.svg + qc_svg_preprocessed = DADA2_QUALITY2.out.svg + versions = ch_versions_dada2_preprocessing } diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 6523d174..8a7952ae 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -153,6 +153,7 @@ include { QIIME2_INTAX } from '../modules/local/qiime2_intax' include { PICRUST } from '../modules/local/picrust' include { SBDIEXPORT } from '../modules/local/sbdiexport' include { SBDIEXPORTREANNOTATE } from '../modules/local/sbdiexportreannotate' +include { SUMMARY_REPORT } from '../modules/local/summary_report' // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules @@ -661,6 +662,27 @@ workflow AMPLISEQ { multiqc_report = MULTIQC.out.report.toList() } + // + // MODULE: Summary Report + // + + SUMMARY_REPORT ( + Channel.fromPath("${baseDir}/assets/report_template.Rmd"), + Channel.fromPath("${baseDir}/assets/report_styles.css"), + MULTIQC.out.plots, //.collect().flatten().collectFile(name: "mqc_fastqc_per_sequence_quality_scores_plot_1.svg") + CUTADAPT_WORKFLOW.out.summary.collect(), + DADA2_PREPROCESSING.out.args.first(), + DADA2_PREPROCESSING.out.qc_svg.collectFile(name: "FW_qual_stats.svg"), + DADA2_PREPROCESSING.out.qc_svg.collectFile(name: "RV_qual_stats.svg"), + DADA2_PREPROCESSING.out.qc_svg.collectFile(name: "FW_preprocessed_qual_stats.svg"), + DADA2_PREPROCESSING.out.qc_svg.collectFile(name: "RV_preprocessed_qual_stats.svg"), + DADA2_ERR.out.svg + ) + + + // TODO Versions in Report + //ch_versions = ch_versions.mix(SUMMARY_REPORT.out.versions) + //Save input in results folder input = file(params.input) if ( is_fasta_input || input.toString().toLowerCase().endsWith("tsv") ) { From a8b916029272400903f3aeddd0f578c7c1d1d6c0 Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 24 Mar 2023 15:06:06 +0100 Subject: [PATCH 002/230] fix --skip_cutadapt and --skip_dada_quality --- assets/report_template.Rmd | 22 +++++++++++----------- bin/generate_report.R | 4 ++-- modules/local/summary_report.nf | 13 ++++--------- subworkflows/local/dada2_preprocessing.nf | 9 +++++++-- workflows/ampliseq.nf | 10 +++++----- 5 files changed, 29 insertions(+), 29 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index c3b50c0b..2ac37eab 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -15,8 +15,8 @@ output: params: flag_skip_fastqc: FALSE flag_skip_cutadapt: FALSE - flag_skip_dada2: FALSE - flag_skip_dada2_: TRUE + flag_skip_dada_quality: FALSE + flag_skip_dada_quality_: TRUE flag_skip_barrnap: FALSE flag_retain_untrimmed: TRUE flag_trunclenf: "" @@ -108,7 +108,7 @@ ggplot(cutadapt_summary, theme_bw() ``` -```{r, eval = !params$flag_skip_dada2, results='asis'} +```{r, eval = !params$flag_skip_dada_quality, results='asis'} dada2_dir <- paste0("../dada2/QC/") cat("## QC using DADA2\n") cat("Further quality checks were performed using the DADA2 package and ", @@ -117,7 +117,7 @@ cat("Further quality checks were performed using the DADA2 package and ", "found [here](", dada2_dir, ").", sep = "") ``` -```{r, eval = !params$flag_skip_dada2, results='asis'} +```{r, eval = !params$flag_skip_dada_quality, results='asis'} if (params$flag_trunc_qmin != -1) { f_and_tr_args <- readLines(params$dada_filtntrim_args) trunc_len <- strsplit(gsub(".*truncLen = c\\((.+)\\),maxN.*", "\\1", @@ -135,32 +135,32 @@ if (params$flag_trunc_qmin != -1) { } ``` -```{r, eval = !params$flag_skip_dada2, results='asis', out.width="49%", fig.show='hold',fig.align='center'} +```{r, eval = !params$flag_skip_dada_quality, results='asis', out.width="49%", fig.show='hold',fig.align='center'} # TODO FW or RV may not exist # TODO svg seems to have an error knitr::include_graphics(c(params$dada_qc_f_path,params$dada_qc_r_path)) ``` -```{r, eval = !params$flag_skip_dada2, results='asis', out.width="49%", fig.show='hold',fig.align='center'} +```{r, eval = !params$flag_skip_dada_quality, results='asis', out.width="49%", fig.show='hold',fig.align='center'} # TODO FW or RV may not exist # TODO same issue, also error in svg knitr::include_graphics(c(params$dada_pp_qc_f_path, params$dada_pp_qc_r_path)) ``` -```{r, eval = !params$flag_skip_dada2, results='asis'} +```{r, eval = !params$flag_skip_dada_quality, results='asis'} cat("## Error correction using DADA2\n") cat("Error correction was performed using DADA2 as well and the original ", "plots can be found [here](../dada2/QC/).", sep = "") ``` -```{r, eval = !params$flag_skip_dada2, results = 'asis', out.width="49%", fig.show='hold',fig.align='center'} +```{r, eval = !params$flag_skip_dada_quality, results = 'asis', out.width="49%", fig.show='hold',fig.align='center'} # TODO Error profiles per run (Name may change default 1) # TODO paried end produces *_2* otherwise only _1 knitr::include_graphics(c(params$dada_1_err_path, params$dada_2_err_path)) ``` -```{r, eval = !params$flag_skip_dada2_, results='asis'} +```{r, eval = !params$flag_skip_dada_quality_, results='asis'} # Header cat("## ASV inference using DADA2\n") @@ -182,7 +182,7 @@ cat("The ASVs can be found [here](", path_asv_fa, "). And the corresponding", "be found [here](", path_dada2_tab, ").", sep = "") ``` -```{r, eval = !params$flag_skip_dada2_, results='asis'} +```{r, eval = !params$flag_skip_dada_quality_, results='asis'} # import stats tsv dada_stats_path <- paste0(params$path_pl_results, "/dada2/DADA2_stats.tsv") dada_stats <- read.table(file = dada_stats_path, header = TRUE, sep = "\t") @@ -248,7 +248,7 @@ ggplot(dada_stats_p_t, aes(fill = steps_t, y = asvs_p_t, x = samples_t)) + # Taxonomic Classification -```{r, eval = !params$flag_skip_dada2_, results='asis'} +```{r, eval = !params$flag_skip_dada_quality_, results='asis'} # Header cat("## Taxonomic Classification using DADA2\n") diff --git a/bin/generate_report.R b/bin/generate_report.R index 79180eac..d7acf830 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -9,7 +9,7 @@ option_list = list( make_option(c("-o", "--output"), type="character", default="ampliseq_report.html", help="output file name", metavar="character"), make_option(c("--skip_fastqc"), action="store_true", default=FALSE, help="Trigger to skip fastqc reporting", metavar="logical"), make_option(c("--skip_cutadapt"), action="store_true", default=FALSE, help="Trigger to skip cutadapt filtering", metavar="logical"), - make_option(c("--skip_dada2"), action="store_true", default=FALSE, help="Trigger to skip dada2", metavar="logical"), + make_option(c("--skip_dada_quality"), action="store_true", default=FALSE, help="Trigger to skip dada2 quality plotting", metavar="logical"), make_option(c("--skip_barrnap"), action="store_true", default=FALSE, help="Trigger to skip barrnap ASV filtering", metavar="logical"), make_option(c("--retain_untrimmed"), action="store_true", default=FALSE, help="Flag to retain the untrimmed sequences", metavar="logical"), make_option(c("--trunclenf"), type="numeric", default=-1, help="Flag to define truncation in forward strand", metavar="numeric"), @@ -43,7 +43,7 @@ rmarkdown::render(opt$report, output_file = opt$output, params = list( flag_skip_fastqc = opt$skip_fastqc, flag_skip_cutadapt = opt$skip_cutadapt, - flag_skip_dada2 = opt$skip_dada2, + flag_skip_dada_quality = opt$skip_dada_quality, flag_skip_barrnap = opt$skip_barrnap, flag_retain_untrimmed = opt$retain_untrimmed, flag_trunclenf = opt$trunclenf, diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index d2139268..033b32b9 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -30,25 +30,20 @@ process SUMMARY_REPORT { script: def skip_fastqc = params.skip_fastqc ? "--skip_fastqc" : "" - def skip_cutadapt = params.skip_cutadapt ? "--skip_cutadapt" : "" - def skip_dada2 = params.skip_dada_quality ? "--skip_dada2" : "" + def cutadapt = params.skip_cutadapt ? "--skip_cutadapt" : "--ca_sum_path $ca_summary" + def dada_quality = params.skip_dada_quality ? "--skip_dada_quality" : "--dada_qc_f_path $dada_fw_qual_stats --dada_qc_r_path $dada_rv_qual_stats --dada_pp_qc_f_path $dada_pp_fw_qual_stats --dada_pp_qc_r_path $dada_pp_rv_qual_stats" def skip_barrnap = params.skip_barrnap ? "--skip_barrnap" : "" def retain_untrimmed = params.retain_untrimmed ? "--retain_untrimmed" : "" """ generate_report.R --report $report_template \\ --output "Summary_Report.html" \\ --mqc_plot "${mqc_plots}/svg/mqc_fastqc_per_sequence_quality_scores_plot_1.svg" \\ - --ca_sum_path $ca_summary \\ --dada_filtntrim_args $dada_filtntrim_args \\ - --dada_qc_f_path $dada_fw_qual_stats \\ - --dada_qc_r_path $dada_rv_qual_stats \\ - --dada_pp_qc_f_path $dada_pp_fw_qual_stats \\ - --dada_pp_qc_r_path $dada_pp_rv_qual_stats \\ --dada_1_err_path ./1_1.err.svg \\ --dada_2_err_path ./1_2.err.svg \\ $skip_fastqc \\ - $skip_cutadapt \\ - $skip_dada2 \\ + $cutadapt \\ + $dada_quality \\ $skip_barrnap \\ $retain_untrimmed \\ --trunclenf $params.trunclenf \\ diff --git a/subworkflows/local/dada2_preprocessing.nf b/subworkflows/local/dada2_preprocessing.nf index e2e6bd42..181e9c56 100644 --- a/subworkflows/local/dada2_preprocessing.nf +++ b/subworkflows/local/dada2_preprocessing.nf @@ -41,10 +41,12 @@ workflow DADA2_PREPROCESSING { .set { ch_all_trimmed_reads } } + ch_DADA2_QUALITY1_SVG = Channel.empty() if ( !params.skip_dada_quality ) { DADA2_QUALITY1 ( ch_all_trimmed_reads.dump(tag: 'into_dada2_quality') ) ch_versions_dada2_preprocessing = ch_versions_dada2_preprocessing.mix(DADA2_QUALITY1.out.versions) DADA2_QUALITY1.out.warning.subscribe { if ( it.baseName.toString().startsWith("WARNING") ) log.warn it.baseName.toString().replace("WARNING ","DADA2_QUALITY1: ") } + ch_DADA2_QUALITY1_SVG = DADA2_QUALITY1.out.svg } //find truncation values in case they are not supplied @@ -94,9 +96,12 @@ workflow DADA2_PREPROCESSING { .mix ( ch_all_preprocessed_rv ) .set { ch_all_preprocessed_reads } } + + ch_DADA2_QUALITY2_SVG = Channel.empty() if ( !params.skip_dada_quality ) { DADA2_QUALITY2 ( ch_all_preprocessed_reads.dump(tag: 'into_dada2_quality2') ) DADA2_QUALITY2.out.warning.subscribe { if ( it.baseName.toString().startsWith("WARNING") ) log.warn it.baseName.toString().replace("WARNING ","DADA2_QUALITY2: ") } + ch_DADA2_QUALITY2_SVG = DADA2_QUALITY2.out.svg } //group by sequencing run @@ -121,7 +126,7 @@ workflow DADA2_PREPROCESSING { reads = ch_filt_reads logs = DADA2_FILTNTRIM.out.log args = DADA2_FILTNTRIM.out.args - qc_svg = DADA2_QUALITY1.out.svg - qc_svg_preprocessed = DADA2_QUALITY2.out.svg + qc_svg = ch_DADA2_QUALITY1_SVG + qc_svg_preprocessed = ch_DADA2_QUALITY2_SVG versions = ch_versions_dada2_preprocessing } diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 8a7952ae..1c82adc9 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -670,12 +670,12 @@ workflow AMPLISEQ { Channel.fromPath("${baseDir}/assets/report_template.Rmd"), Channel.fromPath("${baseDir}/assets/report_styles.css"), MULTIQC.out.plots, //.collect().flatten().collectFile(name: "mqc_fastqc_per_sequence_quality_scores_plot_1.svg") - CUTADAPT_WORKFLOW.out.summary.collect(), + !params.skip_cutadapt ? CUTADAPT_WORKFLOW.out.summary.collect() : [], DADA2_PREPROCESSING.out.args.first(), - DADA2_PREPROCESSING.out.qc_svg.collectFile(name: "FW_qual_stats.svg"), - DADA2_PREPROCESSING.out.qc_svg.collectFile(name: "RV_qual_stats.svg"), - DADA2_PREPROCESSING.out.qc_svg.collectFile(name: "FW_preprocessed_qual_stats.svg"), - DADA2_PREPROCESSING.out.qc_svg.collectFile(name: "RV_preprocessed_qual_stats.svg"), + !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg.collectFile(name: "FW_qual_stats.svg") : [], + !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg.collectFile(name: "RV_qual_stats.svg") : [], + !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg.collectFile(name: "FW_preprocessed_qual_stats.svg") : [], + !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg.collectFile(name: "RV_preprocessed_qual_stats.svg") : [], DADA2_ERR.out.svg ) From 411188b67b5a630cf0bf70bc3d0bca061ffcbfbb Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 24 Mar 2023 16:15:25 +0100 Subject: [PATCH 003/230] fix dada2 error profiles --- assets/report_template.Rmd | 5 +---- modules/local/summary_report.nf | 4 ++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 2ac37eab..56854542 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -153,10 +153,7 @@ cat("Error correction was performed using DADA2 as well and the original ", "plots can be found [here](../dada2/QC/).", sep = "") ``` -```{r, eval = !params$flag_skip_dada_quality, results = 'asis', out.width="49%", fig.show='hold',fig.align='center'} -# TODO Error profiles per run (Name may change default 1) -# TODO paried end produces *_2* otherwise only _1 - +```{r, results = 'asis', out.width="49%", fig.show='hold',fig.align='center'} knitr::include_graphics(c(params$dada_1_err_path, params$dada_2_err_path)) ``` diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 033b32b9..b1075362 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -34,13 +34,13 @@ process SUMMARY_REPORT { def dada_quality = params.skip_dada_quality ? "--skip_dada_quality" : "--dada_qc_f_path $dada_fw_qual_stats --dada_qc_r_path $dada_rv_qual_stats --dada_pp_qc_f_path $dada_pp_fw_qual_stats --dada_pp_qc_r_path $dada_pp_rv_qual_stats" def skip_barrnap = params.skip_barrnap ? "--skip_barrnap" : "" def retain_untrimmed = params.retain_untrimmed ? "--retain_untrimmed" : "" + def dada_err = meta.single_end ? "--dada_1_err_path $dada_err_svgs" : "--dada_1_err_path ${dada_err_svgs[0]} --dada_2_err_path ${dada_err_svgs[1]}" """ generate_report.R --report $report_template \\ --output "Summary_Report.html" \\ --mqc_plot "${mqc_plots}/svg/mqc_fastqc_per_sequence_quality_scores_plot_1.svg" \\ --dada_filtntrim_args $dada_filtntrim_args \\ - --dada_1_err_path ./1_1.err.svg \\ - --dada_2_err_path ./1_2.err.svg \\ + $dada_err \\ $skip_fastqc \\ $cutadapt \\ $dada_quality \\ From b3655d5ef9084d9d27bc17baa9ad4d04921e997c Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 24 Mar 2023 16:25:35 +0100 Subject: [PATCH 004/230] fix dada2 quality profiles for single end data --- assets/report_template.Rmd | 6 ++---- modules/local/summary_report.nf | 4 +++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 56854542..785b88f0 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -113,7 +113,7 @@ dada2_dir <- paste0("../dada2/QC/") cat("## QC using DADA2\n") cat("Further quality checks were performed using the DADA2 package and ", "forward and reverse quality stats are displayed as well as the ", - "respective preprocessed quality stats. The original plots can be", + "respective preprocessed quality stats. The original plots can be ", "found [here](", dada2_dir, ").", sep = "") ``` @@ -136,18 +136,16 @@ if (params$flag_trunc_qmin != -1) { ``` ```{r, eval = !params$flag_skip_dada_quality, results='asis', out.width="49%", fig.show='hold',fig.align='center'} -# TODO FW or RV may not exist # TODO svg seems to have an error knitr::include_graphics(c(params$dada_qc_f_path,params$dada_qc_r_path)) ``` ```{r, eval = !params$flag_skip_dada_quality, results='asis', out.width="49%", fig.show='hold',fig.align='center'} -# TODO FW or RV may not exist # TODO same issue, also error in svg knitr::include_graphics(c(params$dada_pp_qc_f_path, params$dada_pp_qc_r_path)) ``` -```{r, eval = !params$flag_skip_dada_quality, results='asis'} +```{r, results='asis'} cat("## Error correction using DADA2\n") cat("Error correction was performed using DADA2 as well and the original ", "plots can be found [here](../dada2/QC/).", sep = "") diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index b1075362..5cf34abd 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -31,7 +31,9 @@ process SUMMARY_REPORT { script: def skip_fastqc = params.skip_fastqc ? "--skip_fastqc" : "" def cutadapt = params.skip_cutadapt ? "--skip_cutadapt" : "--ca_sum_path $ca_summary" - def dada_quality = params.skip_dada_quality ? "--skip_dada_quality" : "--dada_qc_f_path $dada_fw_qual_stats --dada_qc_r_path $dada_rv_qual_stats --dada_pp_qc_f_path $dada_pp_fw_qual_stats --dada_pp_qc_r_path $dada_pp_rv_qual_stats" + def dada_quality = params.skip_dada_quality ? "--skip_dada_quality" : + meta.single_end ? "--dada_qc_f_path $dada_fw_qual_stats --dada_pp_qc_f_path $dada_pp_fw_qual_stats" : + "--dada_qc_f_path $dada_fw_qual_stats --dada_qc_r_path $dada_rv_qual_stats --dada_pp_qc_f_path $dada_pp_fw_qual_stats --dada_pp_qc_r_path $dada_pp_rv_qual_stats" def skip_barrnap = params.skip_barrnap ? "--skip_barrnap" : "" def retain_untrimmed = params.retain_untrimmed ? "--retain_untrimmed" : "" def dada_err = meta.single_end ? "--dada_1_err_path $dada_err_svgs" : "--dada_1_err_path ${dada_err_svgs[0]} --dada_2_err_path ${dada_err_svgs[1]}" From 14f89cddfa5f01f46648b73d89a84d5f43183341 Mon Sep 17 00:00:00 2001 From: Till Englert Date: Mon, 3 Apr 2023 09:57:48 +0200 Subject: [PATCH 005/230] Adding more content to the report --- assets/report_template.Rmd | 114 +++++++++++++++++++++++--------- bin/generate_report.R | 8 ++- modules/local/summary_report.nf | 23 +++++-- workflows/ampliseq.nf | 11 ++- 4 files changed, 112 insertions(+), 44 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 785b88f0..2f1bc9c9 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -13,16 +13,18 @@ output: pdf_document: true #bibliography: ./references.bibtex params: + # flags and arguments flag_skip_fastqc: FALSE flag_skip_cutadapt: FALSE flag_skip_dada_quality: FALSE - flag_skip_dada_quality_: TRUE flag_skip_barrnap: FALSE + flag_skip_taxonomy: FALSE flag_retain_untrimmed: TRUE - flag_trunclenf: "" - flag_trunclenr: "" - flag_trunc_qmin: "" + trunclenf: "" + trunclenr: "" + trunc_qmin: "" + # file paths mqc_plot: "" ca_sum_path: "" dada_filtntrim_args: "" @@ -118,7 +120,7 @@ cat("Further quality checks were performed using the DADA2 package and ", ``` ```{r, eval = !params$flag_skip_dada_quality, results='asis'} -if (params$flag_trunc_qmin != -1) { +if (params$trunc_qmin != -1) { f_and_tr_args <- readLines(params$dada_filtntrim_args) trunc_len <- strsplit(gsub(".*truncLen = c\\((.+)\\),maxN.*", "\\1", f_and_tr_args), ", ") @@ -129,8 +131,8 @@ if (params$flag_trunc_qmin != -1) { "forward reads at ", tr_len_f, " bp and reverse ", "reads at ", tr_len_r, " bp.", sep = "") } else { - trunclen <- cat("Forward reads were trimmed at ", params$flag_trunclenf, - " bp and reverse reads were trimmed at ", params$flag_trunclenr, + trunclen <- cat("Forward reads were trimmed at ", params$trunclenf, + " bp and reverse reads were trimmed at ", params$trunclenr, " bp.", sep = "") } ``` @@ -155,32 +157,26 @@ cat("Error correction was performed using DADA2 as well and the original ", knitr::include_graphics(c(params$dada_1_err_path, params$dada_2_err_path)) ``` -```{r, eval = !params$flag_skip_dada_quality_, results='asis'} +```{r, eval = !params$flag_skip_dada_quality, results='asis'} # Header cat("## ASV inference using DADA2\n") -# import asv tsv -asv_table_path <- paste0(params$path_pl_results, "/dada2/ASV_table.tsv") -asv_table <- read.table(file = asv_table_path, header = TRUE, sep = "\t") +#import asv table +asv_table <- read.table(file = params$asv_table_path, header = TRUE, sep = "\t") n_asv <- length(asv_table$ASV_ID) -# Define additional table paths -path_asv_fa <- paste0(params$path_pl_results, "/dada2/ASV_seqs.fasta") -path_dada2_tab <- paste0(params$path_pl_results, "/dada2/DADA2_table.tsv") - # Output text cat(n_asv, "amplicon sequence variants (ASVs) were obtained across all samples. ") -cat("The ASVs can be found [here](", path_asv_fa, "). And the corresponding", +cat("The ASVs can be found [here](", params$path_asv_fa, "). And the corresponding", " quantification of the ASVs across samples can be found ", - "[here](", path_asv_path, "). An extensive table containing both can ", - "be found [here](", path_dada2_tab, ").", sep = "") + "[here](", params$path_asv_path, "). An extensive table containing both can ", + "be found [here](", params$path_dada2_tab, ").", sep = "") ``` -```{r, eval = !params$flag_skip_dada_quality_, results='asis'} +```{r, eval = !params$flag_skip_dada_quality, results='asis'} # import stats tsv -dada_stats_path <- paste0(params$path_pl_results, "/dada2/DADA2_stats.tsv") -dada_stats <- read.table(file = dada_stats_path, header = TRUE, sep = "\t") +dada_stats <- read.table(file = params$dada_stats_path, header = TRUE, sep = "\t") # Display table datatable(dada_stats, options = list( @@ -239,16 +235,62 @@ ggplot(dada_stats_p_t, aes(fill = steps_t, y = asvs_p_t, x = samples_t)) + # Filtering of ASVs +```{r, eval = !params$flag_skip_barrnap, results='asis'} +# Header +cat("## ASV filtering using Barrnap\n") +cat("Barrnap classifies the ASVs into the origin domain (including ", + "mitochondiral origin). Using this classification the ASVs can ", + "be filtered by sample origin.\n\n", sep = "") + +# Read the barrnap files and count the lines +l_paths_rrna <- c(params$path_rrna_arc, params$path_rrna_bac, params$path_rrna_euk, params$path_rrna_mito) +n_rrna <- c() +for (path_rrna in l_paths_rrna) { + n_rrna <- append(n_rrna, length(readLines(path_rrna)) - 1) +} + +label <- c("Archea", "Bacteria", "Eukaryotes", "Mitochondria") +p_rrna <- round(n_rrna / n_asv * 100, 2) +barrnap_df <- data.frame(label, n_rrna, p_rrna) + +# Build outputtext +outputstr <- "Barrnap classified " + +for (row in seq_len(nrow(barrnap_df))) { + outputstr <- paste0(outputstr, barrnap_df[row, ]$n_rrna, " (", + barrnap_df[row, ]$p_rrna, " %) ASVs similar to ", + barrnap_df[row, ]$label) + switch(as.character(row), + "3" = outputstr <- paste0(outputstr, " and "), + "4" = outputstr <- paste0(outputstr, ", respectively.\n\n"), + outputstr <- paste0(outputstr, ", ")) +} + +# Output text +cat(outputstr) + +# Barplot +# Fix order of bars +barrnap_df$label <- factor(barrnap_df$label, levels = barrnap_df$label) + +# Plot +ggplot(barrnap_df, + aes(x = reorder(label, desc(label)), y = p_rrna)) + + geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) + + ylab("% Classification") + + xlab("rRNA origins") + + coord_flip() + + theme_bw() +``` # Taxonomic Classification -```{r, eval = !params$flag_skip_dada_quality_, results='asis'} +```{r, eval = !params$flag_skip_taxonomy, results='asis'} # Header cat("## Taxonomic Classification using DADA2\n") -ref_tax_path <- paste0(params$path_pl_results, "/dada2/ref_taxonomy.txt") -ref_tax <- readLines(ref_tax_path) +ref_tax <- readLines(params$ref_tax_path) db <- "Unknown DB" for (line in ref_tax){ @@ -261,20 +303,26 @@ for (line in ref_tax){ cat("The taxonomic classification was performed by DADA2 using the database: ", "\"", db, "\".\n\n", sep = "") -asv_tax_path <- paste0(params$path_pl_results, "/dada2/ASV_tax_species.tsv") -asv_tax <- read.table(asv_tax_path, header = TRUE, sep = "\t") +asv_tax <- read.table(params$asv_tax_path, header = TRUE, sep = "\t") # Calculate the classified numbers/percent of asv -level <- c("Domain", "Kingdom", "Phylum", "Class", "Order", "Family", "Genus") -n_asv_unclassified <- c(count(asv_tax, Domain)$n[1], - count(asv_tax, Kingdom)$n[1], +level <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus") + +# Catch 100% Kingdom assignment +if (count(asv_tax, Kingdom)$n[1] == nrow(asv_tax)){ + n_kingdom = 0 +} else { + n_kingdom = count(asv_tax, Kingdom)$n[1] +} +n_asv_tax = nrow(asv_tax) +n_asv_unclassified <- c(n_kingdom, count(asv_tax, Phylum)$n[1], count(asv_tax, Class)$n[1], count(asv_tax, Order)$n[1], count(asv_tax, Family)$n[1], count(asv_tax, Genus)$n[1]) -n_asv_classified <- n_asv - n_asv_unclassified -p_asv_classified <- round(n_asv_classified / n_asv * 100, 2) +n_asv_classified <- n_asv_tax - n_asv_unclassified +p_asv_classified <- round(n_asv_classified / n_asv_tax * 100, 2) asv_classi_df <- data.frame(level, n_asv_classified, p_asv_classified) @@ -285,8 +333,8 @@ for (row in seq_len(nrow(asv_classi_df))) { outputstr <- paste0(outputstr, asv_classi_df[row, ]$p_asv_classified, " % ASVs at ", asv_classi_df[row, ]$level, " level") switch(as.character(row), - "6" = outputstr <- paste0(outputstr, " and "), - "7" = outputstr <- paste0(outputstr, ".\n\n"), + "5" = outputstr <- paste0(outputstr, " and "), + "6" = outputstr <- paste0(outputstr, ".\n\n"), outputstr <- paste0(outputstr, ", ")) } diff --git a/bin/generate_report.R b/bin/generate_report.R index d7acf830..fbe21ec9 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -11,6 +11,7 @@ option_list = list( make_option(c("--skip_cutadapt"), action="store_true", default=FALSE, help="Trigger to skip cutadapt filtering", metavar="logical"), make_option(c("--skip_dada_quality"), action="store_true", default=FALSE, help="Trigger to skip dada2 quality plotting", metavar="logical"), make_option(c("--skip_barrnap"), action="store_true", default=FALSE, help="Trigger to skip barrnap ASV filtering", metavar="logical"), + make_option(c("--skip_taxonomy"), action="store_true", default=FALSE, help="Trigger to skip taxonomic classification", metavar="logical"), make_option(c("--retain_untrimmed"), action="store_true", default=FALSE, help="Flag to retain the untrimmed sequences", metavar="logical"), make_option(c("--trunclenf"), type="numeric", default=-1, help="Flag to define truncation in forward strand", metavar="numeric"), make_option(c("--trunclenr"), type="numeric", default=-1, help="Flag to define truncation in reverse strand", metavar="numeric"), @@ -45,10 +46,11 @@ rmarkdown::render(opt$report, output_file = opt$output, flag_skip_cutadapt = opt$skip_cutadapt, flag_skip_dada_quality = opt$skip_dada_quality, flag_skip_barrnap = opt$skip_barrnap, + flag_skip_taxonomy = opt$skip_taxonomy, flag_retain_untrimmed = opt$retain_untrimmed, - flag_trunclenf = opt$trunclenf, - flag_trunclenr = opt$trunclenr, - flag_trunc_qmin = opt$trunc_qmin, + trunclenf = opt$trunclenf, + trunclenr = opt$trunclenr, + trunc_qmin = opt$trunc_qmin, mqc_plot = opt$mqc_plot, ca_sum_path = opt$ca_sum_path, dada_filtntrim_args = opt$dada_filtntrim_args, diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 5cf34abd..9261df4e 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -19,6 +19,13 @@ process SUMMARY_REPORT { path(dada_pp_fw_qual_stats) path(dada_pp_rv_qual_stats) tuple val(meta), path(dada_err_svgs) + path(dada_asv_table) + path(dada_asv_fa) + path(dada_tab) + path(dada_stats) + path(barrnap_gff) + path(tax_reference) + path(asv_tax) output: @@ -29,24 +36,26 @@ process SUMMARY_REPORT { task.ext.when == null || task.ext.when script: - def skip_fastqc = params.skip_fastqc ? "--skip_fastqc" : "" + def fastqc = params.skip_fastqc ? "--skip_fastqc" : "--mqc_plot ${mqc_plots}/svg/mqc_fastqc_per_sequence_quality_scores_plot_1.svg" def cutadapt = params.skip_cutadapt ? "--skip_cutadapt" : "--ca_sum_path $ca_summary" def dada_quality = params.skip_dada_quality ? "--skip_dada_quality" : meta.single_end ? "--dada_qc_f_path $dada_fw_qual_stats --dada_pp_qc_f_path $dada_pp_fw_qual_stats" : - "--dada_qc_f_path $dada_fw_qual_stats --dada_qc_r_path $dada_rv_qual_stats --dada_pp_qc_f_path $dada_pp_fw_qual_stats --dada_pp_qc_r_path $dada_pp_rv_qual_stats" + "--dada_qc_f_path $dada_fw_qual_stats --dada_qc_r_path $dada_rv_qual_stats --dada_pp_qc_f_path $dada_pp_fw_qual_stats --dada_pp_qc_r_path $dada_pp_rv_qual_stats --asv_table_path $dada_asv_table --path_asv_fa $dada_asv_fa --path_dada2_tab $dada_tab --dada_stats_path $dada_stats" def skip_barrnap = params.skip_barrnap ? "--skip_barrnap" : "" def retain_untrimmed = params.retain_untrimmed ? "--retain_untrimmed" : "" def dada_err = meta.single_end ? "--dada_1_err_path $dada_err_svgs" : "--dada_1_err_path ${dada_err_svgs[0]} --dada_2_err_path ${dada_err_svgs[1]}" + def barrnap = params.skip_barrnap ? "--skip_barrnap" : "--path_rrna_arc ${barrnap_gff[0]} --path_rrna_bac ${barrnap_gff[1]} --path_rrna_euk ${barrnap_gff[2]} --path_rrna_mito ${barrnap_gff[3]}" + def taxonomy = params.skip_taxonomy ? "--skip_taxonomy" : "--ref_tax_path $tax_reference --asv_tax_path $asv_tax" """ generate_report.R --report $report_template \\ --output "Summary_Report.html" \\ - --mqc_plot "${mqc_plots}/svg/mqc_fastqc_per_sequence_quality_scores_plot_1.svg" \\ - --dada_filtntrim_args $dada_filtntrim_args \\ - $dada_err \\ - $skip_fastqc \\ + $fastqc \\ $cutadapt \\ $dada_quality \\ - $skip_barrnap \\ + --dada_filtntrim_args $dada_filtntrim_args \\ + $dada_err \\ + $barrnap \\ + $taxonomy \\ $retain_untrimmed \\ --trunclenf $params.trunclenf \\ --trunclenr $params.trunclenr \\ diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 1c82adc9..b373e32d 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -666,6 +666,8 @@ workflow AMPLISEQ { // MODULE: Summary Report // + ch_dada_taxonomy = !params.skip_dada_addspecies ? DADA2_ADDSPECIES.out.tsv : DADA2_TAXONOMY.out.tsv + SUMMARY_REPORT ( Channel.fromPath("${baseDir}/assets/report_template.Rmd"), Channel.fromPath("${baseDir}/assets/report_styles.css"), @@ -676,7 +678,14 @@ workflow AMPLISEQ { !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg.collectFile(name: "RV_qual_stats.svg") : [], !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg.collectFile(name: "FW_preprocessed_qual_stats.svg") : [], !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg.collectFile(name: "RV_preprocessed_qual_stats.svg") : [], - DADA2_ERR.out.svg + DADA2_ERR.out.svg, + DADA2_MERGE.out.asv, + DADA2_MERGE.out.fasta, + DADA2_MERGE.out.dada2asv, + DADA2_MERGE.out.dada2stats, + !params.skip_barrnap ? BARRNAP.out.gff.collect(sort: true) : [], + !params.skip_taxonomy ? FORMAT_TAXONOMY.out.ref_tax_info : [], + !params.skip_taxonomy ? ch_dada_taxonomy : [] ) From 2614ac7b384395bd21fe63758bd6e13399973857 Mon Sep 17 00:00:00 2001 From: Till Englert Date: Mon, 3 Apr 2023 10:14:59 +0200 Subject: [PATCH 006/230] Small refactoring and fix skip_taxonomy --- assets/report_template.Rmd | 14 ++++++-------- workflows/ampliseq.nf | 4 +--- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 2f1bc9c9..4260dd31 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -137,23 +137,21 @@ if (params$trunc_qmin != -1) { } ``` -```{r, eval = !params$flag_skip_dada_quality, results='asis', out.width="49%", fig.show='hold',fig.align='center'} +```{r, eval = !params$flag_skip_dada_quality, results='asis', out.width="49%", fig.show='hold', fig.align='center'} # TODO svg seems to have an error knitr::include_graphics(c(params$dada_qc_f_path,params$dada_qc_r_path)) ``` -```{r, eval = !params$flag_skip_dada_quality, results='asis', out.width="49%", fig.show='hold',fig.align='center'} +```{r, eval = !params$flag_skip_dada_quality, results='asis', out.width="49%", fig.show='hold', fig.align='center'} # TODO same issue, also error in svg knitr::include_graphics(c(params$dada_pp_qc_f_path, params$dada_pp_qc_r_path)) ``` -```{r, results='asis'} -cat("## Error correction using DADA2\n") -cat("Error correction was performed using DADA2 as well and the original ", - "plots can be found [here](../dada2/QC/).", sep = "") -``` +## Error correction using DADA2 + +Error correction was performed using DADA2 as well and the originalplots can be found [here](../dada2/QC/). -```{r, results = 'asis', out.width="49%", fig.show='hold',fig.align='center'} +```{r, results = 'asis', out.width="49%", fig.show='hold', fig.align='center'} knitr::include_graphics(c(params$dada_1_err_path, params$dada_2_err_path)) ``` diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index b373e32d..fcfa2b94 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -666,8 +666,6 @@ workflow AMPLISEQ { // MODULE: Summary Report // - ch_dada_taxonomy = !params.skip_dada_addspecies ? DADA2_ADDSPECIES.out.tsv : DADA2_TAXONOMY.out.tsv - SUMMARY_REPORT ( Channel.fromPath("${baseDir}/assets/report_template.Rmd"), Channel.fromPath("${baseDir}/assets/report_styles.css"), @@ -685,7 +683,7 @@ workflow AMPLISEQ { DADA2_MERGE.out.dada2stats, !params.skip_barrnap ? BARRNAP.out.gff.collect(sort: true) : [], !params.skip_taxonomy ? FORMAT_TAXONOMY.out.ref_tax_info : [], - !params.skip_taxonomy ? ch_dada_taxonomy : [] + !params.skip_taxonomy ? !params.skip_dada_addspecies ? DADA2_ADDSPECIES.out.tsv : DADA2_TAXONOMY.out.tsv : [] ) From 414b45aa930d98786102f3b4708ce861489c4b7c Mon Sep 17 00:00:00 2001 From: Till Englert Date: Mon, 3 Apr 2023 10:59:31 +0200 Subject: [PATCH 007/230] fix custom reference db --- assets/report_template.Rmd | 27 +++++++++++++++++---------- bin/generate_report.R | 8 +++++--- modules/local/summary_report.nf | 3 ++- workflows/ampliseq.nf | 3 ++- 4 files changed, 26 insertions(+), 15 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 4260dd31..e2091eb4 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -20,6 +20,7 @@ params: flag_skip_barrnap: FALSE flag_skip_taxonomy: FALSE flag_retain_untrimmed: TRUE + flag_ref_tax_user: FALSE trunclenf: "" trunclenr: "" trunc_qmin: "" @@ -288,18 +289,24 @@ ggplot(barrnap_df, # Header cat("## Taxonomic Classification using DADA2\n") -ref_tax <- readLines(params$ref_tax_path) +if (!params$flag_ref_tax_user) { + ref_tax <- readLines(params$ref_tax_path) -db <- "Unknown DB" -for (line in ref_tax){ - if (grepl("Title:", line)) { - db <- sub(".*Title: ", "", line) - } -} + db <- "Unknown DB" + for (line in ref_tax){ + if (grepl("Title:", line)) { + db <- sub(".*Title: ", "", line) + } + } -# Output text db -cat("The taxonomic classification was performed by DADA2 using the database: ", - "\"", db, "\".\n\n", sep = "") + # Output text db + cat("The taxonomic classification was performed by DADA2 using the database: ", + "\"", db, "\".\n\n", sep = "") +} else { + # Output text db + cat("The taxonomic classification was performed by DADA2 using a custom database ", + "provided by the user.\n\n", sep = "") +} asv_tax <- read.table(params$asv_tax_path, header = TRUE, sep = "\t") diff --git a/bin/generate_report.R b/bin/generate_report.R index fbe21ec9..9d1a342b 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -13,9 +13,10 @@ option_list = list( make_option(c("--skip_barrnap"), action="store_true", default=FALSE, help="Trigger to skip barrnap ASV filtering", metavar="logical"), make_option(c("--skip_taxonomy"), action="store_true", default=FALSE, help="Trigger to skip taxonomic classification", metavar="logical"), make_option(c("--retain_untrimmed"), action="store_true", default=FALSE, help="Flag to retain the untrimmed sequences", metavar="logical"), - make_option(c("--trunclenf"), type="numeric", default=-1, help="Flag to define truncation in forward strand", metavar="numeric"), - make_option(c("--trunclenr"), type="numeric", default=-1, help="Flag to define truncation in reverse strand", metavar="numeric"), - make_option(c("--trunc_qmin"), type="numeric", default=-1, help="Flag to define truncation via quality measure. Set to -1 if trunclen were given.", metavar="numeric"), + make_option(c("--ref_tax_user"), action="store_true", default=FALSE, help="Flag that user provided custom db", metavar="logical"), + make_option(c("--trunclenf"), type="numeric", default=-1, help="Parameter to define truncation in forward strand", metavar="numeric"), + make_option(c("--trunclenr"), type="numeric", default=-1, help="Parameter to define truncation in reverse strand", metavar="numeric"), + make_option(c("--trunc_qmin"), type="numeric", default=-1, help="Parameter to define truncation via quality measure. Set to -1 if trunclen were given.", metavar="numeric"), make_option(c("--mqc_plot"), type="character", default=NULL, help="MultiQC plot per sequence quality", metavar="character"), make_option(c("--ca_sum_path"), type="character", default=NULL, help="cutadapt summary table", metavar="character"), make_option(c("--dada_filtntrim_args"), type="character", default=NULL, help="DADA2 arguments for filter and trim process", metavar="character"), @@ -48,6 +49,7 @@ rmarkdown::render(opt$report, output_file = opt$output, flag_skip_barrnap = opt$skip_barrnap, flag_skip_taxonomy = opt$skip_taxonomy, flag_retain_untrimmed = opt$retain_untrimmed, + flag_ref_tax_user = opt$ref_tax_user, trunclenf = opt$trunclenf, trunclenr = opt$trunclenr, trunc_qmin = opt$trunc_qmin, diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 9261df4e..fc6f3105 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -45,7 +45,8 @@ process SUMMARY_REPORT { def retain_untrimmed = params.retain_untrimmed ? "--retain_untrimmed" : "" def dada_err = meta.single_end ? "--dada_1_err_path $dada_err_svgs" : "--dada_1_err_path ${dada_err_svgs[0]} --dada_2_err_path ${dada_err_svgs[1]}" def barrnap = params.skip_barrnap ? "--skip_barrnap" : "--path_rrna_arc ${barrnap_gff[0]} --path_rrna_bac ${barrnap_gff[1]} --path_rrna_euk ${barrnap_gff[2]} --path_rrna_mito ${barrnap_gff[3]}" - def taxonomy = params.skip_taxonomy ? "--skip_taxonomy" : "--ref_tax_path $tax_reference --asv_tax_path $asv_tax" + def taxonomy = params.skip_taxonomy ? "--skip_taxonomy" : + params.dada_ref_tax_custom ? "--ref_tax_user --asv_tax_path $asv_tax" : "--ref_tax_path $tax_reference --asv_tax_path $asv_tax" """ generate_report.R --report $report_template \\ --output "Summary_Report.html" \\ diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index fcfa2b94..d85538c7 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -682,7 +682,8 @@ workflow AMPLISEQ { DADA2_MERGE.out.dada2asv, DADA2_MERGE.out.dada2stats, !params.skip_barrnap ? BARRNAP.out.gff.collect(sort: true) : [], - !params.skip_taxonomy ? FORMAT_TAXONOMY.out.ref_tax_info : [], + // TODO custom ref db + !params.skip_taxonomy ? !params.dada_ref_tax_custom ? FORMAT_TAXONOMY.out.ref_tax_info : [] : [], !params.skip_taxonomy ? !params.skip_dada_addspecies ? DADA2_ADDSPECIES.out.tsv : DADA2_TAXONOMY.out.tsv : [] ) From 0b0e9e1ac3fb93d97af369dc2f4747a98609cd2e Mon Sep 17 00:00:00 2001 From: Till Englert Date: Mon, 3 Apr 2023 11:09:25 +0200 Subject: [PATCH 008/230] fix dada quality single end --- modules/local/summary_report.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index fc6f3105..d6025061 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -39,7 +39,7 @@ process SUMMARY_REPORT { def fastqc = params.skip_fastqc ? "--skip_fastqc" : "--mqc_plot ${mqc_plots}/svg/mqc_fastqc_per_sequence_quality_scores_plot_1.svg" def cutadapt = params.skip_cutadapt ? "--skip_cutadapt" : "--ca_sum_path $ca_summary" def dada_quality = params.skip_dada_quality ? "--skip_dada_quality" : - meta.single_end ? "--dada_qc_f_path $dada_fw_qual_stats --dada_pp_qc_f_path $dada_pp_fw_qual_stats" : + meta.single_end ? "--dada_qc_f_path $dada_fw_qual_stats --dada_pp_qc_f_path $dada_pp_fw_qual_stats --asv_table_path $dada_asv_table --path_asv_fa $dada_asv_fa --path_dada2_tab $dada_tab --dada_stats_path $dada_stats" : "--dada_qc_f_path $dada_fw_qual_stats --dada_qc_r_path $dada_rv_qual_stats --dada_pp_qc_f_path $dada_pp_fw_qual_stats --dada_pp_qc_r_path $dada_pp_rv_qual_stats --asv_table_path $dada_asv_table --path_asv_fa $dada_asv_fa --path_dada2_tab $dada_tab --dada_stats_path $dada_stats" def skip_barrnap = params.skip_barrnap ? "--skip_barrnap" : "" def retain_untrimmed = params.retain_untrimmed ? "--retain_untrimmed" : "" From 2d2943b5b19205f5a42b25f2215c1c09d4b07b9b Mon Sep 17 00:00:00 2001 From: Till Englert Date: Mon, 3 Apr 2023 14:23:52 +0200 Subject: [PATCH 009/230] fix single end dada2 stats table --- assets/report_template.Rmd | 71 ++++++++++++++++++++++----------- bin/generate_report.R | 2 + modules/local/summary_report.nf | 3 +- 3 files changed, 52 insertions(+), 24 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index e2091eb4..6993cc90 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -21,6 +21,7 @@ params: flag_skip_taxonomy: FALSE flag_retain_untrimmed: TRUE flag_ref_tax_user: FALSE + flag_single_end: FALSE trunclenf: "" trunclenr: "" trunc_qmin: "" @@ -173,6 +174,10 @@ cat("The ASVs can be found [here](", params$path_asv_fa, "). And the correspondi "be found [here](", params$path_dada2_tab, ").", sep = "") ``` +```{r} +print(params$flag_single_end) +``` + ```{r, eval = !params$flag_skip_dada_quality, results='asis'} # import stats tsv dada_stats <- read.table(file = params$dada_stats_path, header = TRUE, sep = "\t") @@ -186,27 +191,51 @@ datatable(dada_stats, options = list( # Stacked barchart to num of reads # Calc exluded asvs and transform all cols to percent -dada_stats_ex <- data.frame(sample = dada_stats$sample, - DADA2_input = dada_stats$DADA2_input, - filtered = dada_stats$DADA2_input-dada_stats$filtered, - denoisedF = dada_stats$filtered-dada_stats$denoisedF, - denoisedR = dada_stats$denoisedF-dada_stats$denoisedR, - merged = dada_stats$denoisedR-dada_stats$merged, - nonchim = dada_stats$merged-dada_stats$nonchim, - analysis = dada_stats$nonchim) - -dada_stats_p <- data.frame(sample = dada_stats_ex$sample, round(dada_stats_ex[2:8]/dada_stats_ex$DADA2_input, 2)) - -# Stack columns for both stacked barcharts -n_samples <- length(dada_stats_p$sample) -samples_t <- c(rep(dada_stats_p$sample, 6)) -steps_t <- c(rep("excluded by filtering", n_samples), rep("excluded by denoisedF", n_samples), + +if ( params$flag_single_end ) { + # single end + dada_stats_ex <- data.frame(sample = dada_stats$sample, + input = dada_stats$input, + filtered = dada_stats$input-dada_stats$filtered, + denoised = dada_stats$filtered-dada_stats$denoised, + nonchim = dada_stats$denoised-dada_stats$nonchim, + analysis = dada_stats$nonchim) + dada_stats_p <- data.frame(sample = dada_stats_ex$sample, round(dada_stats_ex[2:6]/dada_stats_ex$input, 2)) + n_samples <- length(dada_stats_p$sample) + # Stack columns for both stacked barcharts + samples_t <- c(rep(dada_stats_p$sample, 4)) + steps_t <- c(rep("excluded by filtering", n_samples), rep("excluded by denoised", n_samples), + rep("excluded by nonchim", n_samples), rep("ready for analysis", n_samples)) + # stack the column for absolute number of asvs + asvs_abs_t <- as.array(flatten_dbl(dada_stats_ex[3:6])) + dada_stats_ex_t <- data.frame(samples_t, steps_t, asvs_abs_t) + # stack the column for percentage of asvs + asvs_p_t <- as.array(flatten_dbl(dada_stats_p[3:6])) + dada_stats_p_t <- data.frame(samples_t, steps_t, asvs_p_t) +} else { + # paired end + dada_stats_ex <- data.frame(sample = dada_stats$sample, + DADA2_input = dada_stats$DADA2_input, + filtered = dada_stats$DADA2_input-dada_stats$filtered, + denoisedF = dada_stats$filtered-dada_stats$denoisedF, + denoisedR = dada_stats$denoisedF-dada_stats$denoisedR, + merged = dada_stats$denoisedR-dada_stats$merged, + nonchim = dada_stats$merged-dada_stats$nonchim, + analysis = dada_stats$nonchim) + dada_stats_p <- data.frame(sample = dada_stats_ex$sample, round(dada_stats_ex[2:8]/dada_stats_ex$DADA2_input, 2)) + # Stack columns for both stacked barcharts + n_samples <- length(dada_stats_p$sample) + samples_t <- c(rep(dada_stats_p$sample, 6)) + steps_t <- c(rep("excluded by filtering", n_samples), rep("excluded by denoisedF", n_samples), rep("excluded by denoisedR", n_samples), rep("excluded by merged", n_samples), rep("excluded by nonchim", n_samples), rep("ready for analysis", n_samples)) - -# stack the column for absolute number of asvs -asvs_abs_t <- as.array(flatten_dbl(dada_stats_ex[3:8])) -dada_stats_ex_t <- data.frame(samples_t, steps_t, asvs_abs_t) + # stack the column for absolute number of asvs + asvs_abs_t <- as.array(flatten_dbl(dada_stats_ex[3:8])) + dada_stats_ex_t <- data.frame(samples_t, steps_t, asvs_abs_t) + # stack the column for percentage of asvs + asvs_p_t <- as.array(flatten_dbl(dada_stats_p[3:8])) + dada_stats_p_t <- data.frame(samples_t, steps_t, asvs_p_t) +} # Plot dada_stats_ex_t$steps_t <- factor(dada_stats_ex_t$steps_t, levels=unique(dada_stats_ex_t$steps_t)) @@ -217,10 +246,6 @@ ggplot(dada_stats_ex_t, aes(fill = steps_t, y = asvs_abs_t, x = samples_t)) + coord_flip() + scale_fill_brewer("Filtering Steps", palette = "Spectral") -# stack the column for percentage of asvs -asvs_p_t <- as.array(flatten_dbl(dada_stats_p[3:8])) -dada_stats_p_t <- data.frame(samples_t, steps_t, asvs_p_t) - # Plot dada_stats_p_t$steps_t <- factor(dada_stats_p_t$steps_t, levels=unique(dada_stats_p_t$steps_t)) ggplot(dada_stats_p_t, aes(fill = steps_t, y = asvs_p_t, x = samples_t)) + diff --git a/bin/generate_report.R b/bin/generate_report.R index 9d1a342b..088ed16a 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -14,6 +14,7 @@ option_list = list( make_option(c("--skip_taxonomy"), action="store_true", default=FALSE, help="Trigger to skip taxonomic classification", metavar="logical"), make_option(c("--retain_untrimmed"), action="store_true", default=FALSE, help="Flag to retain the untrimmed sequences", metavar="logical"), make_option(c("--ref_tax_user"), action="store_true", default=FALSE, help="Flag that user provided custom db", metavar="logical"), + make_option(c("--single_end"), action="store_true", default=FALSE, help="Flag if single end data is used", metavar="logical"), make_option(c("--trunclenf"), type="numeric", default=-1, help="Parameter to define truncation in forward strand", metavar="numeric"), make_option(c("--trunclenr"), type="numeric", default=-1, help="Parameter to define truncation in reverse strand", metavar="numeric"), make_option(c("--trunc_qmin"), type="numeric", default=-1, help="Parameter to define truncation via quality measure. Set to -1 if trunclen were given.", metavar="numeric"), @@ -50,6 +51,7 @@ rmarkdown::render(opt$report, output_file = opt$output, flag_skip_taxonomy = opt$skip_taxonomy, flag_retain_untrimmed = opt$retain_untrimmed, flag_ref_tax_user = opt$ref_tax_user, + flag_single_end = opt$single_end, trunclenf = opt$trunclenf, trunclenr = opt$trunclenr, trunc_qmin = opt$trunc_qmin, diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index d6025061..06f0de79 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -41,8 +41,8 @@ process SUMMARY_REPORT { def dada_quality = params.skip_dada_quality ? "--skip_dada_quality" : meta.single_end ? "--dada_qc_f_path $dada_fw_qual_stats --dada_pp_qc_f_path $dada_pp_fw_qual_stats --asv_table_path $dada_asv_table --path_asv_fa $dada_asv_fa --path_dada2_tab $dada_tab --dada_stats_path $dada_stats" : "--dada_qc_f_path $dada_fw_qual_stats --dada_qc_r_path $dada_rv_qual_stats --dada_pp_qc_f_path $dada_pp_fw_qual_stats --dada_pp_qc_r_path $dada_pp_rv_qual_stats --asv_table_path $dada_asv_table --path_asv_fa $dada_asv_fa --path_dada2_tab $dada_tab --dada_stats_path $dada_stats" - def skip_barrnap = params.skip_barrnap ? "--skip_barrnap" : "" def retain_untrimmed = params.retain_untrimmed ? "--retain_untrimmed" : "" + def single_end = meta.single_end ? "--single_end" : "" def dada_err = meta.single_end ? "--dada_1_err_path $dada_err_svgs" : "--dada_1_err_path ${dada_err_svgs[0]} --dada_2_err_path ${dada_err_svgs[1]}" def barrnap = params.skip_barrnap ? "--skip_barrnap" : "--path_rrna_arc ${barrnap_gff[0]} --path_rrna_bac ${barrnap_gff[1]} --path_rrna_euk ${barrnap_gff[2]} --path_rrna_mito ${barrnap_gff[3]}" def taxonomy = params.skip_taxonomy ? "--skip_taxonomy" : @@ -58,6 +58,7 @@ process SUMMARY_REPORT { $barrnap \\ $taxonomy \\ $retain_untrimmed \\ + $single_end \\ --trunclenf $params.trunclenf \\ --trunclenr $params.trunclenr \\ --trunc_qmin $params.trunc_qmin From ba5f207acc855dbb0df31beba5c46fd7b64e9758 Mon Sep 17 00:00:00 2001 From: Till Englert Date: Tue, 4 Apr 2023 07:10:44 +0200 Subject: [PATCH 010/230] Change number of asv origin from barrnap --- assets/report_template.Rmd | 5 +++-- bin/generate_report.R | 2 ++ modules/local/summary_report.nf | 3 ++- workflows/ampliseq.nf | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 6993cc90..b332c7b1 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -44,6 +44,7 @@ params: path_rrna_bac: "" path_rrna_euk: "" path_rrna_mito: "" + path_barrnap_sum: "" ref_tax_path: "" asv_tax_path: "" --- @@ -272,9 +273,9 @@ n_rrna <- c() for (path_rrna in l_paths_rrna) { n_rrna <- append(n_rrna, length(readLines(path_rrna)) - 1) } - +barrnap_sum <- read.table(params$path_barrnap_sum) label <- c("Archea", "Bacteria", "Eukaryotes", "Mitochondria") -p_rrna <- round(n_rrna / n_asv * 100, 2) +p_rrna <- round(n_rrna / nrow(barrnap_sum) * 100, 2) barrnap_df <- data.frame(label, n_rrna, p_rrna) # Build outputtext diff --git a/bin/generate_report.R b/bin/generate_report.R index 088ed16a..3a57ddc7 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -35,6 +35,7 @@ option_list = list( make_option(c("--path_rrna_bac"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--path_rrna_euk"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--path_rrna_mito"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--path_barrnap_sum"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--ref_tax_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--asv_tax_path"), type="character", default=NULL, help="MultiQC plots", metavar="character") ) @@ -72,5 +73,6 @@ rmarkdown::render(opt$report, output_file = opt$output, path_rrna_bac = opt$path_rrna_bac, path_rrna_euk = opt$path_rrna_euk, path_rrna_mito = opt$path_rrna_mito, + path_barrnap_sum = opt$path_barrnap_sum, ref_tax_path = opt$ref_tax_path, asv_tax_path = opt$asv_tax_path)) diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 06f0de79..f74303c1 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -24,6 +24,7 @@ process SUMMARY_REPORT { path(dada_tab) path(dada_stats) path(barrnap_gff) + path(barrnap_summary) path(tax_reference) path(asv_tax) @@ -44,7 +45,7 @@ process SUMMARY_REPORT { def retain_untrimmed = params.retain_untrimmed ? "--retain_untrimmed" : "" def single_end = meta.single_end ? "--single_end" : "" def dada_err = meta.single_end ? "--dada_1_err_path $dada_err_svgs" : "--dada_1_err_path ${dada_err_svgs[0]} --dada_2_err_path ${dada_err_svgs[1]}" - def barrnap = params.skip_barrnap ? "--skip_barrnap" : "--path_rrna_arc ${barrnap_gff[0]} --path_rrna_bac ${barrnap_gff[1]} --path_rrna_euk ${barrnap_gff[2]} --path_rrna_mito ${barrnap_gff[3]}" + def barrnap = params.skip_barrnap ? "--skip_barrnap" : "--path_rrna_arc ${barrnap_gff[0]} --path_rrna_bac ${barrnap_gff[1]} --path_rrna_euk ${barrnap_gff[2]} --path_rrna_mito ${barrnap_gff[3]} --path_barrnap_sum $barrnap_summary" def taxonomy = params.skip_taxonomy ? "--skip_taxonomy" : params.dada_ref_tax_custom ? "--ref_tax_user --asv_tax_path $asv_tax" : "--ref_tax_path $tax_reference --asv_tax_path $asv_tax" """ diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index d85538c7..b62c212d 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -682,7 +682,7 @@ workflow AMPLISEQ { DADA2_MERGE.out.dada2asv, DADA2_MERGE.out.dada2stats, !params.skip_barrnap ? BARRNAP.out.gff.collect(sort: true) : [], - // TODO custom ref db + !params.skip_barrnap ? BARRNAPSUMMARY.out.summary : [], !params.skip_taxonomy ? !params.dada_ref_tax_custom ? FORMAT_TAXONOMY.out.ref_tax_info : [] : [], !params.skip_taxonomy ? !params.skip_dada_addspecies ? DADA2_ADDSPECIES.out.tsv : DADA2_TAXONOMY.out.tsv : [] ) From 3b728a28e8ed5f83a038068d5e8cf4b8c5f74a95 Mon Sep 17 00:00:00 2001 From: Till Englert Date: Wed, 5 Apr 2023 11:38:34 +0200 Subject: [PATCH 011/230] Move some parameters which were below skip_dada_quality --- assets/report_template.Rmd | 10 ++++------ modules/local/summary_report.nf | 8 ++++++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index b332c7b1..63e05f9f 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -122,7 +122,9 @@ cat("Further quality checks were performed using the DADA2 package and ", "found [here](", dada2_dir, ").", sep = "") ``` + ```{r, eval = !params$flag_skip_dada_quality, results='asis'} +# TODO is this really skippable by skip_dada_quality? if (params$trunc_qmin != -1) { f_and_tr_args <- readLines(params$dada_filtntrim_args) trunc_len <- strsplit(gsub(".*truncLen = c\\((.+)\\),maxN.*", "\\1", @@ -158,7 +160,7 @@ Error correction was performed using DADA2 as well and the originalplots can be knitr::include_graphics(c(params$dada_1_err_path, params$dada_2_err_path)) ``` -```{r, eval = !params$flag_skip_dada_quality, results='asis'} +```{r, results='asis'} # Header cat("## ASV inference using DADA2\n") @@ -175,11 +177,7 @@ cat("The ASVs can be found [here](", params$path_asv_fa, "). And the correspondi "be found [here](", params$path_dada2_tab, ").", sep = "") ``` -```{r} -print(params$flag_single_end) -``` - -```{r, eval = !params$flag_skip_dada_quality, results='asis'} +```{r, results='asis'} # import stats tsv dada_stats <- read.table(file = params$dada_stats_path, header = TRUE, sep = "\t") diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index f74303c1..09abb304 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -40,8 +40,8 @@ process SUMMARY_REPORT { def fastqc = params.skip_fastqc ? "--skip_fastqc" : "--mqc_plot ${mqc_plots}/svg/mqc_fastqc_per_sequence_quality_scores_plot_1.svg" def cutadapt = params.skip_cutadapt ? "--skip_cutadapt" : "--ca_sum_path $ca_summary" def dada_quality = params.skip_dada_quality ? "--skip_dada_quality" : - meta.single_end ? "--dada_qc_f_path $dada_fw_qual_stats --dada_pp_qc_f_path $dada_pp_fw_qual_stats --asv_table_path $dada_asv_table --path_asv_fa $dada_asv_fa --path_dada2_tab $dada_tab --dada_stats_path $dada_stats" : - "--dada_qc_f_path $dada_fw_qual_stats --dada_qc_r_path $dada_rv_qual_stats --dada_pp_qc_f_path $dada_pp_fw_qual_stats --dada_pp_qc_r_path $dada_pp_rv_qual_stats --asv_table_path $dada_asv_table --path_asv_fa $dada_asv_fa --path_dada2_tab $dada_tab --dada_stats_path $dada_stats" + meta.single_end ? "--dada_qc_f_path $dada_fw_qual_stats --dada_pp_qc_f_path $dada_pp_fw_qual_stats" : + "--dada_qc_f_path $dada_fw_qual_stats --dada_qc_r_path $dada_rv_qual_stats --dada_pp_qc_f_path $dada_pp_fw_qual_stats --dada_pp_qc_r_path $dada_pp_rv_qual_stats" def retain_untrimmed = params.retain_untrimmed ? "--retain_untrimmed" : "" def single_end = meta.single_end ? "--single_end" : "" def dada_err = meta.single_end ? "--dada_1_err_path $dada_err_svgs" : "--dada_1_err_path ${dada_err_svgs[0]} --dada_2_err_path ${dada_err_svgs[1]}" @@ -54,6 +54,10 @@ process SUMMARY_REPORT { $fastqc \\ $cutadapt \\ $dada_quality \\ + --asv_table_path $dada_asv_table \\ + --path_asv_fa $dada_asv_fa \\ + --path_dada2_tab $dada_tab \\ + --dada_stats_path $dada_stats --dada_filtntrim_args $dada_filtntrim_args \\ $dada_err \\ $barrnap \\ From 4e5d5eb58d195a81e96c406f72692e4ae8d0756d Mon Sep 17 00:00:00 2001 From: Till Englert Date: Wed, 5 Apr 2023 12:36:27 +0200 Subject: [PATCH 012/230] bugfix parameter line continue and some refactoring in report template --- assets/report_template.Rmd | 9 +++------ modules/local/summary_report.nf | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 63e05f9f..1c6df78a 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -122,7 +122,6 @@ cat("Further quality checks were performed using the DADA2 package and ", "found [here](", dada2_dir, ").", sep = "") ``` - ```{r, eval = !params$flag_skip_dada_quality, results='asis'} # TODO is this really skippable by skip_dada_quality? if (params$trunc_qmin != -1) { @@ -142,12 +141,12 @@ if (params$trunc_qmin != -1) { } ``` -```{r, eval = !params$flag_skip_dada_quality, results='asis', out.width="49%", fig.show='hold', fig.align='center'} +```{r, eval = !params$flag_skip_dada_quality, out.width="49%", fig.show='hold', fig.align='center'} # TODO svg seems to have an error knitr::include_graphics(c(params$dada_qc_f_path,params$dada_qc_r_path)) ``` -```{r, eval = !params$flag_skip_dada_quality, results='asis', out.width="49%", fig.show='hold', fig.align='center'} +```{r, eval = !params$flag_skip_dada_quality, out.width="49%", fig.show='hold', fig.align='center'} # TODO same issue, also error in svg knitr::include_graphics(c(params$dada_pp_qc_f_path, params$dada_pp_qc_r_path)) ``` @@ -156,7 +155,7 @@ knitr::include_graphics(c(params$dada_pp_qc_f_path, params$dada_pp_qc_r_path)) Error correction was performed using DADA2 as well and the originalplots can be found [here](../dada2/QC/). -```{r, results = 'asis', out.width="49%", fig.show='hold', fig.align='center'} +```{r, out.width="49%", fig.show='hold', fig.align='center'} knitr::include_graphics(c(params$dada_1_err_path, params$dada_2_err_path)) ``` @@ -175,9 +174,7 @@ cat("The ASVs can be found [here](", params$path_asv_fa, "). And the correspondi " quantification of the ASVs across samples can be found ", "[here](", params$path_asv_path, "). An extensive table containing both can ", "be found [here](", params$path_dada2_tab, ").", sep = "") -``` -```{r, results='asis'} # import stats tsv dada_stats <- read.table(file = params$dada_stats_path, header = TRUE, sep = "\t") diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 09abb304..72510f2b 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -57,7 +57,7 @@ process SUMMARY_REPORT { --asv_table_path $dada_asv_table \\ --path_asv_fa $dada_asv_fa \\ --path_dada2_tab $dada_tab \\ - --dada_stats_path $dada_stats + --dada_stats_path $dada_stats \\ --dada_filtntrim_args $dada_filtntrim_args \\ $dada_err \\ $barrnap \\ From b193c0f2cddde67d4759662cb269c8c521916b52 Mon Sep 17 00:00:00 2001 From: Till Englert Date: Wed, 5 Apr 2023 12:44:53 +0200 Subject: [PATCH 013/230] Also skip header if no filtering with barrnap or taxonomic classification was performed --- assets/report_template.Rmd | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 1c6df78a..1eea6132 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -252,11 +252,9 @@ ggplot(dada_stats_p_t, aes(fill = steps_t, y = asvs_p_t, x = samples_t)) + scale_fill_brewer("Filtering Steps", palette = "Spectral") ``` - -# Filtering of ASVs - ```{r, eval = !params$flag_skip_barrnap, results='asis'} # Header +cat("# Filtering of ASVs") cat("## ASV filtering using Barrnap\n") cat("Barrnap classifies the ASVs into the origin domain (including ", "mitochondiral origin). Using this classification the ASVs can ", @@ -304,10 +302,9 @@ ggplot(barrnap_df, theme_bw() ``` -# Taxonomic Classification - ```{r, eval = !params$flag_skip_taxonomy, results='asis'} # Header +cat("# Taxonomic Classification") cat("## Taxonomic Classification using DADA2\n") if (!params$flag_ref_tax_user) { From f72715c869495b3c977608e7ad59379e37cf9f8b Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 5 Apr 2023 13:53:07 +0200 Subject: [PATCH 014/230] fix test_single --- assets/report_template.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 1eea6132..d9015a27 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -191,8 +191,8 @@ datatable(dada_stats, options = list( if ( params$flag_single_end ) { # single end dada_stats_ex <- data.frame(sample = dada_stats$sample, - input = dada_stats$input, - filtered = dada_stats$input-dada_stats$filtered, + input = dada_stats$DADA2_input, + filtered = dada_stats$DADA2_input-dada_stats$filtered, denoised = dada_stats$filtered-dada_stats$denoised, nonchim = dada_stats$denoised-dada_stats$nonchim, analysis = dada_stats$nonchim) From a42b9f1fab652becb407eee7b03798c5348c1b21 Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 5 Apr 2023 14:45:05 +0200 Subject: [PATCH 015/230] fix dada2 quality plots --- assets/report_template.Rmd | 8 +++----- modules/local/summary_report.nf | 10 ++++------ subworkflows/local/dada2_preprocessing.nf | 4 ++-- workflows/ampliseq.nf | 6 ++---- 4 files changed, 11 insertions(+), 17 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index d9015a27..0bd97a5b 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -141,13 +141,11 @@ if (params$trunc_qmin != -1) { } ``` -```{r, eval = !params$flag_skip_dada_quality, out.width="49%", fig.show='hold', fig.align='center'} -# TODO svg seems to have an error -knitr::include_graphics(c(params$dada_qc_f_path,params$dada_qc_r_path)) +```{r, eval = !params$flag_skip_dada_quality, out.width="49%", fig.show='hold', fig.align='default'} +knitr::include_graphics(c(params$dada_qc_f_path, params$dada_qc_r_path)) ``` -```{r, eval = !params$flag_skip_dada_quality, out.width="49%", fig.show='hold', fig.align='center'} -# TODO same issue, also error in svg +```{r, eval = !params$flag_skip_dada_quality, out.width="49%", fig.show='hold', fig.align='default'} knitr::include_graphics(c(params$dada_pp_qc_f_path, params$dada_pp_qc_r_path)) ``` diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 72510f2b..79e0f00b 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -14,10 +14,8 @@ process SUMMARY_REPORT { path(mqc_plots) path(ca_summary) path(dada_filtntrim_args) - path(dada_fw_qual_stats) - path(dada_rv_qual_stats) - path(dada_pp_fw_qual_stats) - path(dada_pp_rv_qual_stats) + path(dada_qual_stats) + path(dada_pp_qual_stats) tuple val(meta), path(dada_err_svgs) path(dada_asv_table) path(dada_asv_fa) @@ -40,8 +38,8 @@ process SUMMARY_REPORT { def fastqc = params.skip_fastqc ? "--skip_fastqc" : "--mqc_plot ${mqc_plots}/svg/mqc_fastqc_per_sequence_quality_scores_plot_1.svg" def cutadapt = params.skip_cutadapt ? "--skip_cutadapt" : "--ca_sum_path $ca_summary" def dada_quality = params.skip_dada_quality ? "--skip_dada_quality" : - meta.single_end ? "--dada_qc_f_path $dada_fw_qual_stats --dada_pp_qc_f_path $dada_pp_fw_qual_stats" : - "--dada_qc_f_path $dada_fw_qual_stats --dada_qc_r_path $dada_rv_qual_stats --dada_pp_qc_f_path $dada_pp_fw_qual_stats --dada_pp_qc_r_path $dada_pp_rv_qual_stats" + meta.single_end ? "--dada_qc_f_path $dada_qual_stats --dada_pp_qc_f_path $dada_pp_qual_stats" : + "--dada_qc_f_path ${dada_qual_stats[0]} --dada_qc_r_path ${dada_qual_stats[1]} --dada_pp_qc_f_path ${dada_pp_qual_stats[0]} --dada_pp_qc_r_path ${dada_pp_qual_stats[1]}" def retain_untrimmed = params.retain_untrimmed ? "--retain_untrimmed" : "" def single_end = meta.single_end ? "--single_end" : "" def dada_err = meta.single_end ? "--dada_1_err_path $dada_err_svgs" : "--dada_1_err_path ${dada_err_svgs[0]} --dada_2_err_path ${dada_err_svgs[1]}" diff --git a/subworkflows/local/dada2_preprocessing.nf b/subworkflows/local/dada2_preprocessing.nf index 181e9c56..86801174 100644 --- a/subworkflows/local/dada2_preprocessing.nf +++ b/subworkflows/local/dada2_preprocessing.nf @@ -126,7 +126,7 @@ workflow DADA2_PREPROCESSING { reads = ch_filt_reads logs = DADA2_FILTNTRIM.out.log args = DADA2_FILTNTRIM.out.args - qc_svg = ch_DADA2_QUALITY1_SVG - qc_svg_preprocessed = ch_DADA2_QUALITY2_SVG + qc_svg = ch_DADA2_QUALITY1_SVG.collect(sort:true) + qc_svg_preprocessed = ch_DADA2_QUALITY2_SVG.collect(sort:true) versions = ch_versions_dada2_preprocessing } diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index cef04126..16fe8c7f 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -709,10 +709,8 @@ workflow AMPLISEQ { MULTIQC.out.plots, //.collect().flatten().collectFile(name: "mqc_fastqc_per_sequence_quality_scores_plot_1.svg") !params.skip_cutadapt ? CUTADAPT_WORKFLOW.out.summary.collect() : [], DADA2_PREPROCESSING.out.args.first(), - !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg.collectFile(name: "FW_qual_stats.svg") : [], - !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg.collectFile(name: "RV_qual_stats.svg") : [], - !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg.collectFile(name: "FW_preprocessed_qual_stats.svg") : [], - !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg.collectFile(name: "RV_preprocessed_qual_stats.svg") : [], + !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg : [], + !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg_preprocessed : [], DADA2_ERR.out.svg, DADA2_MERGE.out.asv, DADA2_MERGE.out.fasta, From 44eead4c87c9ffb8cdbaacf2ffb87a7f137825ec Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 5 Apr 2023 14:47:17 +0200 Subject: [PATCH 016/230] fix dada2 error plots side-by-side --- assets/report_template.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 0bd97a5b..1f8a0012 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -153,7 +153,7 @@ knitr::include_graphics(c(params$dada_pp_qc_f_path, params$dada_pp_qc_r_path)) Error correction was performed using DADA2 as well and the originalplots can be found [here](../dada2/QC/). -```{r, out.width="49%", fig.show='hold', fig.align='center'} +```{r, out.width="49%", fig.show='hold', fig.align='default'} knitr::include_graphics(c(params$dada_1_err_path, params$dada_2_err_path)) ``` From 64a76cadfc930fe892cab656af0f4d3d93509a76 Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 5 Apr 2023 15:18:53 +0200 Subject: [PATCH 017/230] fix links to ASV files by DADA2 --- assets/report_template.Rmd | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 1f8a0012..9eade52b 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -168,10 +168,10 @@ n_asv <- length(asv_table$ASV_ID) # Output text cat(n_asv, "amplicon sequence variants (ASVs) were obtained across all samples. ") -cat("The ASVs can be found [here](", params$path_asv_fa, "). And the corresponding", - " quantification of the ASVs across samples can be found ", - "[here](", params$path_asv_path, "). An extensive table containing both can ", - "be found [here](", params$path_dada2_tab, ").", sep = "") +cat("The ASVs can be found in ['ASV_seqs.fasta'](../dada2/). And the corresponding", + " quantification of the ASVs across samples can be found in", + "['ASV_table.tsv'](../dada2/). An extensive table containing both can ", + "be found ['DADA2_table.tsv'](../dada2/)") # import stats tsv dada_stats <- read.table(file = params$dada_stats_path, header = TRUE, sep = "\t") From 82973c9028f0703df649bddd999a32e6019959e6 Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 5 Apr 2023 15:41:16 +0200 Subject: [PATCH 018/230] fix headlines --- assets/report_template.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 9eade52b..103d2d1e 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -252,7 +252,7 @@ ggplot(dada_stats_p_t, aes(fill = steps_t, y = asvs_p_t, x = samples_t)) + ```{r, eval = !params$flag_skip_barrnap, results='asis'} # Header -cat("# Filtering of ASVs") +cat("# Filtering of ASVs\n") cat("## ASV filtering using Barrnap\n") cat("Barrnap classifies the ASVs into the origin domain (including ", "mitochondiral origin). Using this classification the ASVs can ", @@ -302,7 +302,7 @@ ggplot(barrnap_df, ```{r, eval = !params$flag_skip_taxonomy, results='asis'} # Header -cat("# Taxonomic Classification") +cat("# Taxonomic Classification\n") cat("## Taxonomic Classification using DADA2\n") if (!params$flag_ref_tax_user) { From 9da2f54020d5f9111f36e7c07768c057958bb502 Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 5 Apr 2023 17:24:00 +0200 Subject: [PATCH 019/230] improve dada2 display of quality plots --- assets/report_template.Rmd | 63 ++++++++++++++++++++++----------- modules/local/summary_report.nf | 14 ++++---- workflows/ampliseq.nf | 1 + 3 files changed, 52 insertions(+), 26 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 103d2d1e..86361784 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -113,31 +113,38 @@ ggplot(cutadapt_summary, theme_bw() ``` -```{r, eval = !params$flag_skip_dada_quality, results='asis'} -dada2_dir <- paste0("../dada2/QC/") +```{r, results='asis'} +#TODO: params.max_ee should be also reported, see https://nf-co.re/ampliseq/2.5.0/parameters#max_ee cat("## QC using DADA2\n") -cat("Further quality checks were performed using the DADA2 package and ", - "forward and reverse quality stats are displayed as well as the ", - "respective preprocessed quality stats. The original plots can be ", - "found [here](", dada2_dir, ").", sep = "") +if (params$trunc_qmin != -1) { + f_and_tr_args <- readLines(params$dada_filtntrim_args) + trunc_len <- strsplit(gsub(".*truncLen = c\\((.+)\\),maxN.*", "\\1", + f_and_tr_args), ", ") + tr_len_f <- trunc_len[[1]][1] + tr_len_r <- trunc_len[[1]][2] + cat("Reads were trimmed before median quality drops ", + "below ", params$trunc_qmin, " resulting in a trim of ", + "forward reads at ", tr_len_f, " bp and reverse ", + "reads at ", tr_len_r, " bp.", sep = "") +} else if (params$trunclenf == "null" && params$trunclenr == "null") { + cat("Reads were not trimmed.") +} else if (params$trunclenf != 0 && params$trunclenr != 0) { + cat("Forward reads were trimmed at ", params$trunclenf, + " bp and reverse reads were trimmed at ", params$trunclenr, + " bp.", sep = "") +} else if (params$trunclenf != 0) { + cat("Forward reads were trimmed at ", params$trunclenf," bp.", sep = "") +} else if (params$trunclenr != 0) { + cat("Reverse reads were trimmed at ", params$trunclenr," bp.", sep = "") +} ``` ```{r, eval = !params$flag_skip_dada_quality, results='asis'} -# TODO is this really skippable by skip_dada_quality? -if (params$trunc_qmin != -1) { - f_and_tr_args <- readLines(params$dada_filtntrim_args) - trunc_len <- strsplit(gsub(".*truncLen = c\\((.+)\\),maxN.*", "\\1", - f_and_tr_args), ", ") - tr_len_f <- trunc_len[[1]][1] - tr_len_r <- trunc_len[[1]][2] - no_trunclen <- cat("Reads were trimmed before median quality drops ", - "below ", params$flag_trunc_qmin, " resulting in a trim of ", - "forward reads at ", tr_len_f, " bp and reverse ", - "reads at ", tr_len_r, " bp.", sep = "") +#TODO: are forward reads indeed always left? +if (params$flag_single_end) { + cat("Read quality stats for incoming data:") } else { - trunclen <- cat("Forward reads were trimmed at ", params$trunclenf, - " bp and reverse reads were trimmed at ", params$trunclenr, - " bp.", sep = "") + cat("Forward (left) and reverse (right) read quality stats for incoming data:") } ``` @@ -145,10 +152,23 @@ if (params$trunc_qmin != -1) { knitr::include_graphics(c(params$dada_qc_f_path, params$dada_qc_r_path)) ``` +```{r, eval = !params$flag_skip_dada_quality, results='asis'} +#TODO: for "-profile test" the foward reads seem to be on the right side!! easy to see when looking at the length. Even when in "dada2_preprocessing.nf" is stated: qc_svg = ch_DADA2_QUALITY1_SVG.collect(sort:true) +if (params$flag_single_end) { + cat("Read quality stats for preprocessed data:") +} else { + cat("Forward (left) and reverse (right) read quality stats for preprocessed data:") +} +``` + ```{r, eval = !params$flag_skip_dada_quality, out.width="49%", fig.show='hold', fig.align='default'} knitr::include_graphics(c(params$dada_pp_qc_f_path, params$dada_pp_qc_r_path)) ``` +```{r, eval = !params$flag_skip_dada_quality, results='asis'} +cat("Original plots can be found [here](../dada2/QC/).") +``` + ## Error correction using DADA2 Error correction was performed using DADA2 as well and the originalplots can be found [here](../dada2/QC/). @@ -259,6 +279,9 @@ cat("Barrnap classifies the ASVs into the origin domain (including ", "be filtered by sample origin.\n\n", sep = "") # Read the barrnap files and count the lines +# TODO: rather use "results/barrnap/summary.tsv", that includes all info +# TODO: use lowest p-value for ASVs, i.e. use https://github.com/nf-core/ampliseq/blob/78b7514ceeba80efb66b0e973e5321878cb9b0ba/modules/local/filter_ssu.nf#L38-L41 +# TODO: "results/barrnap/summary.tsv" contains only ASVs with annotations, i.e. sequences that do not match are not listed, so use previous ASV count for making relative values! l_paths_rrna <- c(params$path_rrna_arc, params$path_rrna_bac, params$path_rrna_euk, params$path_rrna_mito) n_rrna <- c() for (path_rrna in l_paths_rrna) { diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 79e0f00b..5d3b027b 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -13,6 +13,7 @@ process SUMMARY_REPORT { path(report_styles) path(mqc_plots) path(ca_summary) + val(find_truncation_values) path(dada_filtntrim_args) path(dada_qual_stats) path(dada_pp_qual_stats) @@ -35,13 +36,15 @@ process SUMMARY_REPORT { task.ext.when == null || task.ext.when script: + def single_end = meta.single_end ? "--single_end" : "" def fastqc = params.skip_fastqc ? "--skip_fastqc" : "--mqc_plot ${mqc_plots}/svg/mqc_fastqc_per_sequence_quality_scores_plot_1.svg" - def cutadapt = params.skip_cutadapt ? "--skip_cutadapt" : "--ca_sum_path $ca_summary" + def cutadapt = params.skip_cutadapt ? "--skip_cutadapt" : + params.retain_untrimmed ? "--retain_untrimmed --ca_sum_path $ca_summary" : + "--ca_sum_path $ca_summary" def dada_quality = params.skip_dada_quality ? "--skip_dada_quality" : meta.single_end ? "--dada_qc_f_path $dada_qual_stats --dada_pp_qc_f_path $dada_pp_qual_stats" : "--dada_qc_f_path ${dada_qual_stats[0]} --dada_qc_r_path ${dada_qual_stats[1]} --dada_pp_qc_f_path ${dada_pp_qual_stats[0]} --dada_pp_qc_r_path ${dada_pp_qual_stats[1]}" - def retain_untrimmed = params.retain_untrimmed ? "--retain_untrimmed" : "" - def single_end = meta.single_end ? "--single_end" : "" + def find_truncation = find_truncation_values ? "--trunc_qmin $params.trunc_qmin" : "" def dada_err = meta.single_end ? "--dada_1_err_path $dada_err_svgs" : "--dada_1_err_path ${dada_err_svgs[0]} --dada_2_err_path ${dada_err_svgs[1]}" def barrnap = params.skip_barrnap ? "--skip_barrnap" : "--path_rrna_arc ${barrnap_gff[0]} --path_rrna_bac ${barrnap_gff[1]} --path_rrna_euk ${barrnap_gff[2]} --path_rrna_mito ${barrnap_gff[3]} --path_barrnap_sum $barrnap_summary" def taxonomy = params.skip_taxonomy ? "--skip_taxonomy" : @@ -60,11 +63,10 @@ process SUMMARY_REPORT { $dada_err \\ $barrnap \\ $taxonomy \\ - $retain_untrimmed \\ $single_end \\ + $find_truncation \\ --trunclenf $params.trunclenf \\ - --trunclenr $params.trunclenr \\ - --trunc_qmin $params.trunc_qmin + --trunclenr $params.trunclenr """ //--pl_results $results_dir \\ //cat <<-END_VERSIONS > versions.yml diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 16fe8c7f..02f66941 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -708,6 +708,7 @@ workflow AMPLISEQ { Channel.fromPath("${baseDir}/assets/report_styles.css"), MULTIQC.out.plots, //.collect().flatten().collectFile(name: "mqc_fastqc_per_sequence_quality_scores_plot_1.svg") !params.skip_cutadapt ? CUTADAPT_WORKFLOW.out.summary.collect() : [], + find_truncation_values, DADA2_PREPROCESSING.out.args.first(), !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg : [], !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg_preprocessed : [], From 5dd6480ae7d0191244b69298de8607165ad6c034 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 11 Apr 2023 16:33:29 +0200 Subject: [PATCH 020/230] fix sequence of dada2 quality plots --- assets/report_template.Rmd | 1 - modules/local/summary_report.nf | 3 ++- subworkflows/local/dada2_preprocessing.nf | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 86361784..f7259939 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -153,7 +153,6 @@ knitr::include_graphics(c(params$dada_qc_f_path, params$dada_qc_r_path)) ``` ```{r, eval = !params$flag_skip_dada_quality, results='asis'} -#TODO: for "-profile test" the foward reads seem to be on the right side!! easy to see when looking at the length. Even when in "dada2_preprocessing.nf" is stated: qc_svg = ch_DADA2_QUALITY1_SVG.collect(sort:true) if (params$flag_single_end) { cat("Read quality stats for preprocessed data:") } else { diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 5d3b027b..20a703bb 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -41,9 +41,10 @@ process SUMMARY_REPORT { def cutadapt = params.skip_cutadapt ? "--skip_cutadapt" : params.retain_untrimmed ? "--retain_untrimmed --ca_sum_path $ca_summary" : "--ca_sum_path $ca_summary" + // Even when in "dada2_preprocessing.nf" is stated "qc_svg = ch_DADA2_QUALITY1_SVG.collect(sort:true)" the whole path, not only the file name, is used to sort. So FW cannot be guaranteed to be before RV! def dada_quality = params.skip_dada_quality ? "--skip_dada_quality" : meta.single_end ? "--dada_qc_f_path $dada_qual_stats --dada_pp_qc_f_path $dada_pp_qual_stats" : - "--dada_qc_f_path ${dada_qual_stats[0]} --dada_qc_r_path ${dada_qual_stats[1]} --dada_pp_qc_f_path ${dada_pp_qual_stats[0]} --dada_pp_qc_r_path ${dada_pp_qual_stats[1]}" + "--dada_qc_f_path 'FW_qual_stats.svg' --dada_qc_r_path 'RV_qual_stats.svg' --dada_pp_qc_f_path 'FW_preprocessed_qual_stats.svg' --dada_pp_qc_r_path 'RV_preprocessed_qual_stats.svg'" def find_truncation = find_truncation_values ? "--trunc_qmin $params.trunc_qmin" : "" def dada_err = meta.single_end ? "--dada_1_err_path $dada_err_svgs" : "--dada_1_err_path ${dada_err_svgs[0]} --dada_2_err_path ${dada_err_svgs[1]}" def barrnap = params.skip_barrnap ? "--skip_barrnap" : "--path_rrna_arc ${barrnap_gff[0]} --path_rrna_bac ${barrnap_gff[1]} --path_rrna_euk ${barrnap_gff[2]} --path_rrna_mito ${barrnap_gff[3]} --path_barrnap_sum $barrnap_summary" diff --git a/subworkflows/local/dada2_preprocessing.nf b/subworkflows/local/dada2_preprocessing.nf index 86801174..12412c4a 100644 --- a/subworkflows/local/dada2_preprocessing.nf +++ b/subworkflows/local/dada2_preprocessing.nf @@ -126,7 +126,7 @@ workflow DADA2_PREPROCESSING { reads = ch_filt_reads logs = DADA2_FILTNTRIM.out.log args = DADA2_FILTNTRIM.out.args - qc_svg = ch_DADA2_QUALITY1_SVG.collect(sort:true) - qc_svg_preprocessed = ch_DADA2_QUALITY2_SVG.collect(sort:true) + qc_svg = ch_DADA2_QUALITY1_SVG.collect() + qc_svg_preprocessed = ch_DADA2_QUALITY2_SVG.collect() versions = ch_versions_dada2_preprocessing } From d4d76d625f5c57356657722fef2676c00e1ada37 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 11 Apr 2023 17:01:41 +0200 Subject: [PATCH 021/230] add --trunc_rmin and --max_ee --- assets/report_template.Rmd | 15 +++++++++------ bin/generate_report.R | 4 ++++ modules/local/summary_report.nf | 5 +++-- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index f7259939..37389439 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -24,7 +24,9 @@ params: flag_single_end: FALSE trunclenf: "" trunclenr: "" + max_ee: "" trunc_qmin: "" + trunc_rmin: "" # file paths mqc_plot: "" @@ -123,24 +125,25 @@ if (params$trunc_qmin != -1) { tr_len_f <- trunc_len[[1]][1] tr_len_r <- trunc_len[[1]][2] cat("Reads were trimmed before median quality drops ", - "below ", params$trunc_qmin, " resulting in a trim of ", + "below ", params$trunc_qmin, " and at least ",params$trunc_rmin*100, + "% of reads are retained, resulting in a trim of ", "forward reads at ", tr_len_f, " bp and reverse ", - "reads at ", tr_len_r, " bp.", sep = "") + "reads at ", tr_len_r, " bp. ", sep = "") } else if (params$trunclenf == "null" && params$trunclenr == "null") { cat("Reads were not trimmed.") } else if (params$trunclenf != 0 && params$trunclenr != 0) { cat("Forward reads were trimmed at ", params$trunclenf, " bp and reverse reads were trimmed at ", params$trunclenr, - " bp.", sep = "") + " bp. ", sep = "") } else if (params$trunclenf != 0) { - cat("Forward reads were trimmed at ", params$trunclenf," bp.", sep = "") + cat("Forward reads were trimmed at ", params$trunclenf," bp. ", sep = "") } else if (params$trunclenr != 0) { - cat("Reverse reads were trimmed at ", params$trunclenr," bp.", sep = "") + cat("Reverse reads were trimmed at ", params$trunclenr," bp. ", sep = "") } +cat("Reads with more than", params$max_ee,"expected errors were discarded.", sep = " ") ``` ```{r, eval = !params$flag_skip_dada_quality, results='asis'} -#TODO: are forward reads indeed always left? if (params$flag_single_end) { cat("Read quality stats for incoming data:") } else { diff --git a/bin/generate_report.R b/bin/generate_report.R index 3a57ddc7..07ac5d5f 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -17,7 +17,9 @@ option_list = list( make_option(c("--single_end"), action="store_true", default=FALSE, help="Flag if single end data is used", metavar="logical"), make_option(c("--trunclenf"), type="numeric", default=-1, help="Parameter to define truncation in forward strand", metavar="numeric"), make_option(c("--trunclenr"), type="numeric", default=-1, help="Parameter to define truncation in reverse strand", metavar="numeric"), + make_option(c("--max_ee"), type="numeric", default=-1, help="Parameter to filter reads based on expected errors", metavar="numeric"), make_option(c("--trunc_qmin"), type="numeric", default=-1, help="Parameter to define truncation via quality measure. Set to -1 if trunclen were given.", metavar="numeric"), + make_option(c("--trunc_rmin"), type="numeric", default=-1, help="Parameter to define truncation via read retaining ratio. Set to -1 if trunclen were given.", metavar="numeric"), make_option(c("--mqc_plot"), type="character", default=NULL, help="MultiQC plot per sequence quality", metavar="character"), make_option(c("--ca_sum_path"), type="character", default=NULL, help="cutadapt summary table", metavar="character"), make_option(c("--dada_filtntrim_args"), type="character", default=NULL, help="DADA2 arguments for filter and trim process", metavar="character"), @@ -55,7 +57,9 @@ rmarkdown::render(opt$report, output_file = opt$output, flag_single_end = opt$single_end, trunclenf = opt$trunclenf, trunclenr = opt$trunclenr, + max_ee = opt$max_ee, trunc_qmin = opt$trunc_qmin, + trunc_rmin = opt$trunc_rmin, mqc_plot = opt$mqc_plot, ca_sum_path = opt$ca_sum_path, dada_filtntrim_args = opt$dada_filtntrim_args, diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 20a703bb..0933a4c4 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -45,7 +45,7 @@ process SUMMARY_REPORT { def dada_quality = params.skip_dada_quality ? "--skip_dada_quality" : meta.single_end ? "--dada_qc_f_path $dada_qual_stats --dada_pp_qc_f_path $dada_pp_qual_stats" : "--dada_qc_f_path 'FW_qual_stats.svg' --dada_qc_r_path 'RV_qual_stats.svg' --dada_pp_qc_f_path 'FW_preprocessed_qual_stats.svg' --dada_pp_qc_r_path 'RV_preprocessed_qual_stats.svg'" - def find_truncation = find_truncation_values ? "--trunc_qmin $params.trunc_qmin" : "" + def find_truncation = find_truncation_values ? "--trunc_qmin $params.trunc_qmin --trunc_rmin $params.trunc_rmin" : "" def dada_err = meta.single_end ? "--dada_1_err_path $dada_err_svgs" : "--dada_1_err_path ${dada_err_svgs[0]} --dada_2_err_path ${dada_err_svgs[1]}" def barrnap = params.skip_barrnap ? "--skip_barrnap" : "--path_rrna_arc ${barrnap_gff[0]} --path_rrna_bac ${barrnap_gff[1]} --path_rrna_euk ${barrnap_gff[2]} --path_rrna_mito ${barrnap_gff[3]} --path_barrnap_sum $barrnap_summary" def taxonomy = params.skip_taxonomy ? "--skip_taxonomy" : @@ -67,7 +67,8 @@ process SUMMARY_REPORT { $single_end \\ $find_truncation \\ --trunclenf $params.trunclenf \\ - --trunclenr $params.trunclenr + --trunclenr $params.trunclenr \\ + --max_ee $params.max_ee """ //--pl_results $results_dir \\ //cat <<-END_VERSIONS > versions.yml From c8296b9d1422570da77450e33f8f33c36aa4c955 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 11 Apr 2023 17:07:30 +0200 Subject: [PATCH 022/230] rm TODO --- assets/report_template.Rmd | 1 - 1 file changed, 1 deletion(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 37389439..c5a58fd2 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -116,7 +116,6 @@ ggplot(cutadapt_summary, ``` ```{r, results='asis'} -#TODO: params.max_ee should be also reported, see https://nf-co.re/ampliseq/2.5.0/parameters#max_ee cat("## QC using DADA2\n") if (params$trunc_qmin != -1) { f_and_tr_args <- readLines(params$dada_filtntrim_args) From c052f196a4188b6f26530389156093f8302392bd Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 19 Jun 2023 14:41:41 +0200 Subject: [PATCH 023/230] fix container link --- modules/local/summary_report.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 0933a4c4..8127daf5 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -2,7 +2,7 @@ process SUMMARY_REPORT { label 'process_low' - container 'tillenglert/ampliseq_report:latest' + container 'docker.io/tillenglert/ampliseq_report:latest' //conda (params.enable_conda ? "bioconda:r-markdown==0.8--r3.4.1_1" : null) //container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? // 'https://depot.galaxyproject.org/singularity/r-markdown:0.8--r3.4.1_1' : From b4a1bd943c057da7016e98d7074ef77207749f13 Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 19 Jun 2023 16:08:09 +0200 Subject: [PATCH 024/230] fix input and add taxonomies --- assets/report_template.Rmd | 12 ++++++++---- bin/generate_report.R | 14 ++++++++++---- modules/local/summary_report.nf | 21 +++++++++++++++------ workflows/ampliseq.nf | 7 +++++-- 4 files changed, 38 insertions(+), 16 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index c5a58fd2..410ce599 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -18,7 +18,7 @@ params: flag_skip_cutadapt: FALSE flag_skip_dada_quality: FALSE flag_skip_barrnap: FALSE - flag_skip_taxonomy: FALSE + #flag_skip_taxonomy: FALSE flag_retain_untrimmed: TRUE flag_ref_tax_user: FALSE flag_single_end: FALSE @@ -48,7 +48,11 @@ params: path_rrna_mito: "" path_barrnap_sum: "" ref_tax_path: "" - asv_tax_path: "" + dada2_taxonomy: "" + sintax_taxonomy: "" + pplace_taxonomy: "" + qiime2_taxonomy: "" + --- ```{r setup, include=FALSE} @@ -324,7 +328,7 @@ ggplot(barrnap_df, theme_bw() ``` -```{r, eval = !params$flag_skip_taxonomy, results='asis'} +```{r, eval = (params$dada2_taxonomy != ""), results='asis'} # Header cat("# Taxonomic Classification\n") cat("## Taxonomic Classification using DADA2\n") @@ -348,7 +352,7 @@ if (!params$flag_ref_tax_user) { "provided by the user.\n\n", sep = "") } -asv_tax <- read.table(params$asv_tax_path, header = TRUE, sep = "\t") +asv_tax <- read.table(params$dada2_taxonomy, header = TRUE, sep = "\t") # Calculate the classified numbers/percent of asv level <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus") diff --git a/bin/generate_report.R b/bin/generate_report.R index 07ac5d5f..1a11df8e 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -11,7 +11,7 @@ option_list = list( make_option(c("--skip_cutadapt"), action="store_true", default=FALSE, help="Trigger to skip cutadapt filtering", metavar="logical"), make_option(c("--skip_dada_quality"), action="store_true", default=FALSE, help="Trigger to skip dada2 quality plotting", metavar="logical"), make_option(c("--skip_barrnap"), action="store_true", default=FALSE, help="Trigger to skip barrnap ASV filtering", metavar="logical"), - make_option(c("--skip_taxonomy"), action="store_true", default=FALSE, help="Trigger to skip taxonomic classification", metavar="logical"), + #make_option(c("--skip_taxonomy"), action="store_true", default=FALSE, help="Trigger to skip taxonomic classification", metavar="logical"), make_option(c("--retain_untrimmed"), action="store_true", default=FALSE, help="Flag to retain the untrimmed sequences", metavar="logical"), make_option(c("--ref_tax_user"), action="store_true", default=FALSE, help="Flag that user provided custom db", metavar="logical"), make_option(c("--single_end"), action="store_true", default=FALSE, help="Flag if single end data is used", metavar="logical"), @@ -39,7 +39,10 @@ option_list = list( make_option(c("--path_rrna_mito"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--path_barrnap_sum"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--ref_tax_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--asv_tax_path"), type="character", default=NULL, help="MultiQC plots", metavar="character") + make_option(c("--dada2_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--sintax_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--pplace_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--qiime2_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character") ) opt_parser = OptionParser(option_list = option_list) @@ -51,7 +54,7 @@ rmarkdown::render(opt$report, output_file = opt$output, flag_skip_cutadapt = opt$skip_cutadapt, flag_skip_dada_quality = opt$skip_dada_quality, flag_skip_barrnap = opt$skip_barrnap, - flag_skip_taxonomy = opt$skip_taxonomy, + #flag_skip_taxonomy = opt$skip_taxonomy, flag_retain_untrimmed = opt$retain_untrimmed, flag_ref_tax_user = opt$ref_tax_user, flag_single_end = opt$single_end, @@ -79,4 +82,7 @@ rmarkdown::render(opt$report, output_file = opt$output, path_rrna_mito = opt$path_rrna_mito, path_barrnap_sum = opt$path_barrnap_sum, ref_tax_path = opt$ref_tax_path, - asv_tax_path = opt$asv_tax_path)) + dada2_taxonomy = opt$dada2_taxonomy, + sintax_taxonomy = opt$sintax_taxonomy, + pplace_taxonomy = opt$pplace_taxonomy, + qiime2_taxonomy = opt$qiime2_taxonomy)) diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 8127daf5..921aaf0c 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -24,8 +24,11 @@ process SUMMARY_REPORT { path(dada_stats) path(barrnap_gff) path(barrnap_summary) - path(tax_reference) - path(asv_tax) + path(dada2_tax_reference) + path(dada2_tax) + path(sintax_tax) + path(pplace_tax) + path(qiime2_tax) output: @@ -48,8 +51,11 @@ process SUMMARY_REPORT { def find_truncation = find_truncation_values ? "--trunc_qmin $params.trunc_qmin --trunc_rmin $params.trunc_rmin" : "" def dada_err = meta.single_end ? "--dada_1_err_path $dada_err_svgs" : "--dada_1_err_path ${dada_err_svgs[0]} --dada_2_err_path ${dada_err_svgs[1]}" def barrnap = params.skip_barrnap ? "--skip_barrnap" : "--path_rrna_arc ${barrnap_gff[0]} --path_rrna_bac ${barrnap_gff[1]} --path_rrna_euk ${barrnap_gff[2]} --path_rrna_mito ${barrnap_gff[3]} --path_barrnap_sum $barrnap_summary" - def taxonomy = params.skip_taxonomy ? "--skip_taxonomy" : - params.dada_ref_tax_custom ? "--ref_tax_user --asv_tax_path $asv_tax" : "--ref_tax_path $tax_reference --asv_tax_path $asv_tax" + def dada2_taxonomy = dada2_tax ? "--dada2_taxonomy $dada2_tax" : "" + dada2_taxonomy += params.dada_ref_tax_custom ? " --ref_tax_user" : " --ref_tax_path $dada2_tax_reference" + def sintax_taxonomy = sintax_tax ? "--sintax_taxonomy $sintax_tax" : "" + def pplace_taxonomy = pplace_tax ? "--pplace_taxonomy $pplace_tax" : "" + def qiime2_taxonomy = qiime2_tax ? "--qiime2_taxonomy $qiime2_tax" : "" """ generate_report.R --report $report_template \\ --output "Summary_Report.html" \\ @@ -63,12 +69,15 @@ process SUMMARY_REPORT { --dada_filtntrim_args $dada_filtntrim_args \\ $dada_err \\ $barrnap \\ - $taxonomy \\ $single_end \\ $find_truncation \\ --trunclenf $params.trunclenf \\ --trunclenr $params.trunclenr \\ - --max_ee $params.max_ee + --max_ee $params.max_ee \\ + $dada2_taxonomy \\ + $sintax_taxonomy \\ + $pplace_taxonomy \\ + $qiime2_taxonomy """ //--pl_results $results_dir \\ //cat <<-END_VERSIONS > versions.yml diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index d6ebd30e..3e1ee520 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -682,8 +682,11 @@ workflow AMPLISEQ { DADA2_MERGE.out.dada2stats, !params.skip_barrnap ? BARRNAP.out.gff.collect(sort: true) : [], !params.skip_barrnap ? BARRNAPSUMMARY.out.summary : [], - !params.skip_taxonomy ? !params.dada_ref_tax_custom ? FORMAT_TAXONOMY.out.ref_tax_info : [] : [], - !params.skip_taxonomy ? !params.skip_dada_addspecies ? DADA2_ADDSPECIES.out.tsv : DADA2_TAXONOMY.out.tsv : [] + !params.skip_taxonomy && !params.dada_ref_tax_custom ? FORMAT_TAXONOMY.out.ref_tax_info : [], + !params.skip_taxonomy && params.dada_ref_taxonomy && !params.skip_dada_taxonomy ? ch_dada2_tax : [], + !params.skip_taxonomy && params.sintax_ref_taxonomy ? ch_sintax_tax : [], + !params.skip_taxonomy && params.pplace_tree ? ch_pplace_tax : [], + !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv : [] ) From 0eaa9660cfc8168dacc428c6a1c3d1bbf75b5fbc Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 19 Jun 2023 16:13:24 +0200 Subject: [PATCH 025/230] fix test_sintax --- assets/report_template.Rmd | 1 + workflows/ampliseq.nf | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 410ce599..d6783634 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -354,6 +354,7 @@ if (!params$flag_ref_tax_user) { asv_tax <- read.table(params$dada2_taxonomy, header = TRUE, sep = "\t") +#TODO: this depends on the database used and is defined in ref_databases.config in "taxlevels" # Calculate the classified numbers/percent of asv level <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus") diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 3e1ee520..dd74cfe7 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -682,7 +682,7 @@ workflow AMPLISEQ { DADA2_MERGE.out.dada2stats, !params.skip_barrnap ? BARRNAP.out.gff.collect(sort: true) : [], !params.skip_barrnap ? BARRNAPSUMMARY.out.summary : [], - !params.skip_taxonomy && !params.dada_ref_tax_custom ? FORMAT_TAXONOMY.out.ref_tax_info : [], + !params.skip_taxonomy && params.dada_ref_taxonomy && !params.skip_dada_taxonomy && !params.dada_ref_tax_custom ? FORMAT_TAXONOMY.out.ref_tax_info : [], !params.skip_taxonomy && params.dada_ref_taxonomy && !params.skip_dada_taxonomy ? ch_dada2_tax : [], !params.skip_taxonomy && params.sintax_ref_taxonomy ? ch_sintax_tax : [], !params.skip_taxonomy && params.pplace_tree ? ch_pplace_tax : [], From 03b2e0380b525e2b5c6bfecb521967e5828871fc Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 19 Jun 2023 16:26:11 +0200 Subject: [PATCH 026/230] fix test_novaseq --- modules/local/summary_report.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 921aaf0c..95c5fe3a 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -51,8 +51,8 @@ process SUMMARY_REPORT { def find_truncation = find_truncation_values ? "--trunc_qmin $params.trunc_qmin --trunc_rmin $params.trunc_rmin" : "" def dada_err = meta.single_end ? "--dada_1_err_path $dada_err_svgs" : "--dada_1_err_path ${dada_err_svgs[0]} --dada_2_err_path ${dada_err_svgs[1]}" def barrnap = params.skip_barrnap ? "--skip_barrnap" : "--path_rrna_arc ${barrnap_gff[0]} --path_rrna_bac ${barrnap_gff[1]} --path_rrna_euk ${barrnap_gff[2]} --path_rrna_mito ${barrnap_gff[3]} --path_barrnap_sum $barrnap_summary" - def dada2_taxonomy = dada2_tax ? "--dada2_taxonomy $dada2_tax" : "" - dada2_taxonomy += params.dada_ref_tax_custom ? " --ref_tax_user" : " --ref_tax_path $dada2_tax_reference" + def dada2_taxonomy = !dada2_tax ? "" : + params.dada_ref_tax_custom ? "--dada2_taxonomy $dada2_tax --ref_tax_user" : "--dada2_taxonomy $dada2_tax --ref_tax_path $dada2_tax_reference" def sintax_taxonomy = sintax_tax ? "--sintax_taxonomy $sintax_tax" : "" def pplace_taxonomy = pplace_tax ? "--pplace_taxonomy $pplace_tax" : "" def qiime2_taxonomy = qiime2_tax ? "--qiime2_taxonomy $qiime2_tax" : "" From d7c61335aaf2e1e39a04a78a0309d7d2c6f8f35d Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 19 Jun 2023 17:25:11 +0200 Subject: [PATCH 027/230] improve barrnap report --- assets/report_template.Rmd | 61 ++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index d6783634..42026dcb 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -284,43 +284,40 @@ cat("Barrnap classifies the ASVs into the origin domain (including ", "be filtered by sample origin.\n\n", sep = "") # Read the barrnap files and count the lines -# TODO: rather use "results/barrnap/summary.tsv", that includes all info -# TODO: use lowest p-value for ASVs, i.e. use https://github.com/nf-core/ampliseq/blob/78b7514ceeba80efb66b0e973e5321878cb9b0ba/modules/local/filter_ssu.nf#L38-L41 -# TODO: "results/barrnap/summary.tsv" contains only ASVs with annotations, i.e. sequences that do not match are not listed, so use previous ASV count for making relative values! -l_paths_rrna <- c(params$path_rrna_arc, params$path_rrna_bac, params$path_rrna_euk, params$path_rrna_mito) -n_rrna <- c() -for (path_rrna in l_paths_rrna) { - n_rrna <- append(n_rrna, length(readLines(path_rrna)) - 1) -} -barrnap_sum <- read.table(params$path_barrnap_sum) -label <- c("Archea", "Bacteria", "Eukaryotes", "Mitochondria") -p_rrna <- round(n_rrna / nrow(barrnap_sum) * 100, 2) -barrnap_df <- data.frame(label, n_rrna, p_rrna) - -# Build outputtext -outputstr <- "Barrnap classified " +df = read.table( params$path_barrnap_sum, header = TRUE, sep = "\t", stringsAsFactors = FALSE) +# keep only ASV_ID & eval columns & sort +df <- subset(df, select = c(ASV_ID,mito_eval,euk_eval,arc_eval,bac_eval)) +# choose kingdom (column) with lowest evalue +df[is.na(df)] <- 1 +df$result = colnames(df[,2:5])[apply(df[,2:5],1,which.min)] +df$result = gsub("_eval", "", df$result) -for (row in seq_len(nrow(barrnap_df))) { - outputstr <- paste0(outputstr, barrnap_df[row, ]$n_rrna, " (", - barrnap_df[row, ]$p_rrna, " %) ASVs similar to ", - barrnap_df[row, ]$label) - switch(as.character(row), - "3" = outputstr <- paste0(outputstr, " and "), - "4" = outputstr <- paste0(outputstr, ", respectively.\n\n"), - outputstr <- paste0(outputstr, ", ")) -} +#import asv table +asv_table <- read.table(file = params$asv_table_path, header = TRUE, sep = "\t") +n_asv <- length(asv_table$ASV_ID) -# Output text -cat(outputstr) +# calculate numbers +n_classified <- length(df$result) +n_bac <- sum(grepl("bac", df$result)) +n_arc <- sum(grepl("arc", df$result)) +n_mito <- sum(grepl("mito", df$result)) +n_euk <- sum(grepl("euk", df$result)) -# Barplot +df_sum <- data.frame(label=c('Bacteria','Archea','Mitochondria','Eukaryotes','Unclassified'), + count=c(n_bac,n_arc,n_mito,n_euk,n_asv - n_classified), + percent=c(round( (n_bac/n_asv)*100, 2), round( (n_arc/n_asv)*100, 2), round( (n_mito/n_asv)*100, 2), round( (n_euk/n_asv)*100, 2), round( ( (n_asv - n_classified) /n_asv)*100, 2) ) ) -# Fix order of bars -barrnap_df$label <- factor(barrnap_df$label, levels = barrnap_df$label) +# Build outputtext +cat( "Barrnap classified ") +cat( df_sum$count[1], "(", df_sum$percent[1],"%) ASVs as most similar to Bacteria, " ) +cat( df_sum$count[2], "(", df_sum$percent[2],"%) ASVs to Archea, " ) +cat( df_sum$count[3], "(", df_sum$percent[3],"%) ASVs to Mitochondria, " ) +cat( df_sum$count[4], "(", df_sum$percent[4],"%) ASVs to Eukaryotes, and" ) +cat( df_sum$count[5], "(", df_sum$percent[5],"%) were below similarity threshold to any kingdom." ) -# Plot -ggplot(barrnap_df, - aes(x = reorder(label, desc(label)), y = p_rrna)) + +# Barplot +ggplot(df_sum, + aes(x = reorder(label, desc(label)), y = percent)) + geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) + ylab("% Classification") + xlab("rRNA origins") + From 005f363bc1388e962a76b71ac4a349092e1f6630 Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 19 Jun 2023 17:53:41 +0200 Subject: [PATCH 028/230] add flag that dada2 taxonomy is available --- assets/report_template.Rmd | 5 +++-- bin/generate_report.R | 2 ++ modules/local/summary_report.nf | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 42026dcb..5c807b2f 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -22,6 +22,7 @@ params: flag_retain_untrimmed: TRUE flag_ref_tax_user: FALSE flag_single_end: FALSE + flag_dada2_taxonomy: FALSE trunclenf: "" trunclenr: "" max_ee: "" @@ -48,7 +49,7 @@ params: path_rrna_mito: "" path_barrnap_sum: "" ref_tax_path: "" - dada2_taxonomy: "" + dada2_taxonomy: "empty" sintax_taxonomy: "" pplace_taxonomy: "" qiime2_taxonomy: "" @@ -325,7 +326,7 @@ ggplot(df_sum, theme_bw() ``` -```{r, eval = (params$dada2_taxonomy != ""), results='asis'} +```{r, eval = params$flag_dada2_taxonomy, results='asis'} # Header cat("# Taxonomic Classification\n") cat("## Taxonomic Classification using DADA2\n") diff --git a/bin/generate_report.R b/bin/generate_report.R index 1a11df8e..1385a436 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -40,6 +40,7 @@ option_list = list( make_option(c("--path_barrnap_sum"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--ref_tax_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--dada2_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--flag_dada2_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), make_option(c("--sintax_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--pplace_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--qiime2_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character") @@ -83,6 +84,7 @@ rmarkdown::render(opt$report, output_file = opt$output, path_barrnap_sum = opt$path_barrnap_sum, ref_tax_path = opt$ref_tax_path, dada2_taxonomy = opt$dada2_taxonomy, + flag_dada2_taxonomy = opt$flag_dada2_taxonomy, sintax_taxonomy = opt$sintax_taxonomy, pplace_taxonomy = opt$pplace_taxonomy, qiime2_taxonomy = opt$qiime2_taxonomy)) diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 95c5fe3a..1cdad23f 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -52,7 +52,7 @@ process SUMMARY_REPORT { def dada_err = meta.single_end ? "--dada_1_err_path $dada_err_svgs" : "--dada_1_err_path ${dada_err_svgs[0]} --dada_2_err_path ${dada_err_svgs[1]}" def barrnap = params.skip_barrnap ? "--skip_barrnap" : "--path_rrna_arc ${barrnap_gff[0]} --path_rrna_bac ${barrnap_gff[1]} --path_rrna_euk ${barrnap_gff[2]} --path_rrna_mito ${barrnap_gff[3]} --path_barrnap_sum $barrnap_summary" def dada2_taxonomy = !dada2_tax ? "" : - params.dada_ref_tax_custom ? "--dada2_taxonomy $dada2_tax --ref_tax_user" : "--dada2_taxonomy $dada2_tax --ref_tax_path $dada2_tax_reference" + params.dada_ref_tax_custom ? "--flag_dada2_taxonomy --dada2_taxonomy $dada2_tax --ref_tax_user" : "--flag_dada2_taxonomy --dada2_taxonomy $dada2_tax --ref_tax_path $dada2_tax_reference" def sintax_taxonomy = sintax_tax ? "--sintax_taxonomy $sintax_tax" : "" def pplace_taxonomy = pplace_tax ? "--pplace_taxonomy $pplace_tax" : "" def qiime2_taxonomy = qiime2_tax ? "--qiime2_taxonomy $qiime2_tax" : "" From dfba7268b54a6cbbacfba554c2a3461d554d11d4 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 20 Jun 2023 14:43:53 +0200 Subject: [PATCH 029/230] make taxonomic assignment reporting independent of taxonomic ranks --- assets/report_template.Rmd | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 5c807b2f..b70eb5a3 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -352,23 +352,24 @@ if (!params$flag_ref_tax_user) { asv_tax <- read.table(params$dada2_taxonomy, header = TRUE, sep = "\t") -#TODO: this depends on the database used and is defined in ref_databases.config in "taxlevels" # Calculate the classified numbers/percent of asv -level <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus") +level <- subset(asv_tax, select = -c(ASV_ID,confidence,sequence)) +level <- colnames(level) -# Catch 100% Kingdom assignment -if (count(asv_tax, Kingdom)$n[1] == nrow(asv_tax)){ - n_kingdom = 0 +# Catch 100% highest taxa (e.g. Kingdom) assignment +if (count(asv_tax, level[1])$n[1] == nrow(asv_tax)){ + n_1 = 0 } else { - n_kingdom = count(asv_tax, Kingdom)$n[1] + n_1 = count(asv_tax, level[1])$n[1] } n_asv_tax = nrow(asv_tax) -n_asv_unclassified <- c(n_kingdom, - count(asv_tax, Phylum)$n[1], - count(asv_tax, Class)$n[1], - count(asv_tax, Order)$n[1], - count(asv_tax, Family)$n[1], - count(asv_tax, Genus)$n[1]) +n_asv_unclassified <- c(n_1) +for (x in level[2:length(level)]) { + asv_tax_subset <- subset(asv_tax, select = x) + colnames(asv_tax_subset)[1] <- "count_this" + n_asv_unclassified <- c(n_asv_unclassified, count(asv_tax_subset, count_this)$n[1]) +} + n_asv_classified <- n_asv_tax - n_asv_unclassified p_asv_classified <- round(n_asv_classified / n_asv_tax * 100, 2) @@ -376,15 +377,12 @@ asv_classi_df <- data.frame(level, n_asv_classified, p_asv_classified) # Build output string outputstr <- "DADA2 classified " - for (row in seq_len(nrow(asv_classi_df))) { outputstr <- paste0(outputstr, asv_classi_df[row, ]$p_asv_classified, - " % ASVs at ", asv_classi_df[row, ]$level, " level") - switch(as.character(row), - "5" = outputstr <- paste0(outputstr, " and "), - "6" = outputstr <- paste0(outputstr, ".\n\n"), - outputstr <- paste0(outputstr, ", ")) + " % ASVs at ", asv_classi_df[row, ]$level, " level, ") } +outputstr <- substr(outputstr, 1, nchar(outputstr)-2) +outputstr <- paste0(outputstr, ".\n\n") # Output Text Classifications cat(outputstr) From f2242053a29f0d994205453220be1370580ca88e Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 20 Jun 2023 17:25:20 +0200 Subject: [PATCH 030/230] add QIIME2 taxonomic assignments --- assets/report_template.Rmd | 61 +++++++++++++++++++++++++++++++++ bin/generate_report.R | 2 ++ modules/local/summary_report.nf | 2 +- 3 files changed, 64 insertions(+), 1 deletion(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index b70eb5a3..e63e2134 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -23,6 +23,7 @@ params: flag_ref_tax_user: FALSE flag_single_end: FALSE flag_dada2_taxonomy: FALSE + flag_qiime2_taxonomy: FALSE trunclenf: "" trunclenr: "" max_ee: "" @@ -398,3 +399,63 @@ ggplot(asv_classi_df, coord_flip() + theme_bw() ``` + +```{r, eval = params$flag_qiime2_taxonomy, results='asis'} +# Header +cat("## Taxonomic Classification using QIIME2\n") + +#TODO: add database information +#TODO: only tested for greengenes85, need to test also UNITE and SILVA! + +# Read file and prepare table +asv_tax <- read.table(params$qiime2_taxonomy, header = TRUE, sep = "\t") +#asv_tax <- data.frame(do.call('rbind', strsplit(as.character(asv_tax$Taxon),'; ',fixed=TRUE))) +asv_tax <- subset(asv_tax, select = Taxon) + +# Remove greengenes85 ".__" placeholders +df = as.data.frame(lapply(asv_tax, function(x) gsub(".__", "", x))) +# remove all last, empty ; +df = as.data.frame(lapply(df, function(x) gsub(" ;","",x))) +# remove last remaining, empty ; +df = as.data.frame(lapply(df, function(x) gsub("; $","",x))) + +# get maximum amount of taxa levels per ASV +max_taxa <- lengths(regmatches(df$Taxon, gregexpr("; ", df$Taxon)))+1 + +# Currently, all QIIME2 databases seem to have the same levels! +level <- c("Kingdom","Phylum","Class","Order","Family","Genus","Species") + +# Calculate the classified numbers/percent of asv +n_asv_tax = nrow(asv_tax) + +n_asv_classified <- length(which(max_taxa>=1)) +for (x in 2:length(level)) { + n_asv_classified <- c(n_asv_classified, length(which(max_taxa>=x)) ) +} +p_asv_classified <- round(n_asv_classified / n_asv_tax * 100, 2) + +asv_classi_df <- data.frame(level, n_asv_classified, p_asv_classified) + +# Build output string +outputstr <- "QIIME2 classified " +for (row in seq_len(nrow(asv_classi_df))) { + outputstr <- paste0(outputstr, asv_classi_df[row, ]$p_asv_classified, + " % ASVs at ", asv_classi_df[row, ]$level, " level, ") +} +outputstr <- substr(outputstr, 1, nchar(outputstr)-2) +outputstr <- paste0(outputstr, ".\n\n") + +# Output Text Classifications +cat(outputstr) + +# Barplot +# Plot +asv_classi_df$level <- factor(asv_classi_df$level, levels = asv_classi_df$level) +ggplot(asv_classi_df, + aes(x = reorder(level, desc(level)), y = p_asv_classified)) + + geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) + + ylab("% Classification") + + xlab("Levels") + + coord_flip() + + theme_bw() +``` diff --git a/bin/generate_report.R b/bin/generate_report.R index 1385a436..b6bcfc9b 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -43,6 +43,7 @@ option_list = list( make_option(c("--flag_dada2_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), make_option(c("--sintax_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--pplace_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--flag_qiime2_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), make_option(c("--qiime2_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character") ) @@ -87,4 +88,5 @@ rmarkdown::render(opt$report, output_file = opt$output, flag_dada2_taxonomy = opt$flag_dada2_taxonomy, sintax_taxonomy = opt$sintax_taxonomy, pplace_taxonomy = opt$pplace_taxonomy, + flag_qiime2_taxonomy = opt$flag_qiime2_taxonomy, qiime2_taxonomy = opt$qiime2_taxonomy)) diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 1cdad23f..a40dffa2 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -55,7 +55,7 @@ process SUMMARY_REPORT { params.dada_ref_tax_custom ? "--flag_dada2_taxonomy --dada2_taxonomy $dada2_tax --ref_tax_user" : "--flag_dada2_taxonomy --dada2_taxonomy $dada2_tax --ref_tax_path $dada2_tax_reference" def sintax_taxonomy = sintax_tax ? "--sintax_taxonomy $sintax_tax" : "" def pplace_taxonomy = pplace_tax ? "--pplace_taxonomy $pplace_tax" : "" - def qiime2_taxonomy = qiime2_tax ? "--qiime2_taxonomy $qiime2_tax" : "" + def qiime2_taxonomy = qiime2_tax ? "--flag_qiime2_taxonomy --qiime2_taxonomy $qiime2_tax" : "" """ generate_report.R --report $report_template \\ --output "Summary_Report.html" \\ From b656c662328cf7def42dea39609efdf942c6b392 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 20 Jun 2023 17:38:55 +0200 Subject: [PATCH 031/230] add SINTAX taxonomic assignments --- assets/report_template.Rmd | 56 +++++++++++++++++++++++++++++++++ bin/generate_report.R | 2 ++ modules/local/summary_report.nf | 2 +- 3 files changed, 59 insertions(+), 1 deletion(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index e63e2134..41456a8f 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -24,6 +24,7 @@ params: flag_single_end: FALSE flag_dada2_taxonomy: FALSE flag_qiime2_taxonomy: FALSE + flag_sintax_taxonomy: FASLE trunclenf: "" trunclenr: "" max_ee: "" @@ -459,3 +460,58 @@ ggplot(asv_classi_df, coord_flip() + theme_bw() ``` + +```{r, eval = params$flag_sintax_taxonomy, results='asis'} +# Header +cat("## Taxonomic Classification using SINTAX\n") + +#TODO: add database information + +asv_tax <- read.table(params$sintax_taxonomy, header = TRUE, sep = "\t") + +# Calculate the classified numbers/percent of asv +level <- subset(asv_tax, select = -c(ASV_ID,confidence,sequence)) +level <- colnames(level) + +# Catch 100% highest taxa (e.g. Kingdom) assignment +if (count(asv_tax, level[1])$n[1] == nrow(asv_tax)){ + n_1 = nrow(asv_tax) +} else { + n_1 = count(asv_tax, level[1])$n[1] +} +n_asv_tax = nrow(asv_tax) +n_asv_unclassified <- c(n_1) +for (x in level[2:length(level)]) { + asv_tax_subset <- subset(asv_tax, select = x) + colnames(asv_tax_subset)[1] <- "count_this" + n_asv_unclassified <- c(n_asv_unclassified, count(asv_tax_subset, count_this)$n[1]) +} + +n_asv_classified <- n_asv_tax - n_asv_unclassified +p_asv_classified <- round(n_asv_classified / n_asv_tax * 100, 2) + +asv_classi_df <- data.frame(level, n_asv_classified, p_asv_classified) + +# Build output string +outputstr <- "SINTAX classified " +for (row in seq_len(nrow(asv_classi_df))) { + outputstr <- paste0(outputstr, asv_classi_df[row, ]$p_asv_classified, + " % ASVs at ", asv_classi_df[row, ]$level, " level, ") +} +outputstr <- substr(outputstr, 1, nchar(outputstr)-2) +outputstr <- paste0(outputstr, ".\n\n") + +# Output Text Classifications +cat(outputstr) + +# Barplot +# Plot +asv_classi_df$level <- factor(asv_classi_df$level, levels = asv_classi_df$level) +ggplot(asv_classi_df, + aes(x = reorder(level, desc(level)), y = p_asv_classified)) + + geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) + + ylab("% Classification") + + xlab("Levels") + + coord_flip() + + theme_bw() +``` diff --git a/bin/generate_report.R b/bin/generate_report.R index b6bcfc9b..cc02760d 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -42,6 +42,7 @@ option_list = list( make_option(c("--dada2_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--flag_dada2_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), make_option(c("--sintax_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--flag_sintax_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), make_option(c("--pplace_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--flag_qiime2_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), make_option(c("--qiime2_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character") @@ -86,6 +87,7 @@ rmarkdown::render(opt$report, output_file = opt$output, ref_tax_path = opt$ref_tax_path, dada2_taxonomy = opt$dada2_taxonomy, flag_dada2_taxonomy = opt$flag_dada2_taxonomy, + flag_sintax_taxonomy = opt$flag_sintax_taxonomy, sintax_taxonomy = opt$sintax_taxonomy, pplace_taxonomy = opt$pplace_taxonomy, flag_qiime2_taxonomy = opt$flag_qiime2_taxonomy, diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index a40dffa2..67fbd0ef 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -53,7 +53,7 @@ process SUMMARY_REPORT { def barrnap = params.skip_barrnap ? "--skip_barrnap" : "--path_rrna_arc ${barrnap_gff[0]} --path_rrna_bac ${barrnap_gff[1]} --path_rrna_euk ${barrnap_gff[2]} --path_rrna_mito ${barrnap_gff[3]} --path_barrnap_sum $barrnap_summary" def dada2_taxonomy = !dada2_tax ? "" : params.dada_ref_tax_custom ? "--flag_dada2_taxonomy --dada2_taxonomy $dada2_tax --ref_tax_user" : "--flag_dada2_taxonomy --dada2_taxonomy $dada2_tax --ref_tax_path $dada2_tax_reference" - def sintax_taxonomy = sintax_tax ? "--sintax_taxonomy $sintax_tax" : "" + def sintax_taxonomy = sintax_tax ? "--flag_sintax_taxonomy --sintax_taxonomy $sintax_tax" : "" def pplace_taxonomy = pplace_tax ? "--pplace_taxonomy $pplace_tax" : "" def qiime2_taxonomy = qiime2_tax ? "--flag_qiime2_taxonomy --qiime2_taxonomy $qiime2_tax" : "" """ From 79c90a9893b124fb346e8f5c8418f42f234e4c27 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 20 Jun 2023 17:56:28 +0200 Subject: [PATCH 032/230] add Phylogenetic placement --- assets/report_template.Rmd | 56 +++++++++++++++++++++++++++++++-- bin/generate_report.R | 2 ++ modules/local/summary_report.nf | 2 +- 3 files changed, 57 insertions(+), 3 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 41456a8f..839ffaff 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -24,7 +24,8 @@ params: flag_single_end: FALSE flag_dada2_taxonomy: FALSE flag_qiime2_taxonomy: FALSE - flag_sintax_taxonomy: FASLE + flag_sintax_taxonomy: FALSE + flag_pplace_taxonomy: FALSE trunclenf: "" trunclenr: "" max_ee: "" @@ -402,7 +403,7 @@ ggplot(asv_classi_df, ``` ```{r, eval = params$flag_qiime2_taxonomy, results='asis'} -# Header +# Header #TODO: that header line needs to be also displayed with other taxonomic assignments! cat("## Taxonomic Classification using QIIME2\n") #TODO: add database information @@ -515,3 +516,54 @@ ggplot(asv_classi_df, coord_flip() + theme_bw() ``` + +```{r, eval = params$flag_pplace_taxonomy, results='asis'} +# Header +cat("## Taxonomic Classification using Phylogenetic Placement\n") + +#TODO: add database information +#TODO: only tested for greengenes85, need to test also UNITE and SILVA! + +# Read file and prepare table +asv_tax <- read.table(params$pplace_taxonomy, header = TRUE, sep = "\t") + +# get maximum amount of taxa levels per ASV +max_taxa <- lengths(regmatches(asv_tax$taxonomy, gregexpr(";", asv_tax$taxonomy)))+1 + +# labels for levels +level <- rep(1:max(max_taxa)) + +# Calculate the classified numbers/percent of asv +n_asv_tax = nrow(asv_tax) + +n_asv_classified <- length(which(max_taxa>=1)) +for (x in 2:length(level)) { + n_asv_classified <- c(n_asv_classified, length(which(max_taxa>=x)) ) +} +p_asv_classified <- round(n_asv_classified / n_asv_tax * 100, 2) + +asv_classi_df <- data.frame(level, n_asv_classified, p_asv_classified) + +# Build output string +outputstr <- "Phylogenetic Placement classified " +for (row in seq_len(nrow(asv_classi_df))) { + outputstr <- paste0(outputstr, asv_classi_df[row, ]$p_asv_classified, + " % ASVs at ", asv_classi_df[row, ]$level, " level, ") +} +outputstr <- substr(outputstr, 1, nchar(outputstr)-2) +outputstr <- paste0(outputstr, ".\n\n") + +# Output Text Classifications +cat(outputstr) + +# Barplot +# Plot +asv_classi_df$level <- factor(asv_classi_df$level, levels = asv_classi_df$level) +ggplot(asv_classi_df, + aes(x = reorder(level, desc(level)), y = p_asv_classified)) + + geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) + + ylab("% Classification") + + xlab("Levels") + + coord_flip() + + theme_bw() +``` diff --git a/bin/generate_report.R b/bin/generate_report.R index cc02760d..5c3894fb 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -44,6 +44,7 @@ option_list = list( make_option(c("--sintax_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--flag_sintax_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), make_option(c("--pplace_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--flag_pplace_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), make_option(c("--flag_qiime2_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), make_option(c("--qiime2_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character") ) @@ -89,6 +90,7 @@ rmarkdown::render(opt$report, output_file = opt$output, flag_dada2_taxonomy = opt$flag_dada2_taxonomy, flag_sintax_taxonomy = opt$flag_sintax_taxonomy, sintax_taxonomy = opt$sintax_taxonomy, + flag_pplace_taxonomy = opt$flag_pplace_taxonomy, pplace_taxonomy = opt$pplace_taxonomy, flag_qiime2_taxonomy = opt$flag_qiime2_taxonomy, qiime2_taxonomy = opt$qiime2_taxonomy)) diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 67fbd0ef..3ef91cd0 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -54,7 +54,7 @@ process SUMMARY_REPORT { def dada2_taxonomy = !dada2_tax ? "" : params.dada_ref_tax_custom ? "--flag_dada2_taxonomy --dada2_taxonomy $dada2_tax --ref_tax_user" : "--flag_dada2_taxonomy --dada2_taxonomy $dada2_tax --ref_tax_path $dada2_tax_reference" def sintax_taxonomy = sintax_tax ? "--flag_sintax_taxonomy --sintax_taxonomy $sintax_tax" : "" - def pplace_taxonomy = pplace_tax ? "--pplace_taxonomy $pplace_tax" : "" + def pplace_taxonomy = pplace_tax ? "--flag_pplace_taxonomy --pplace_taxonomy $pplace_tax" : "" def qiime2_taxonomy = qiime2_tax ? "--flag_qiime2_taxonomy --qiime2_taxonomy $qiime2_tax" : "" """ generate_report.R --report $report_template \\ From f5eabb0ec4d3486cdaeb207a56bb1578d0a58347 Mon Sep 17 00:00:00 2001 From: daniel Date: Thu, 22 Jun 2023 13:00:27 +0200 Subject: [PATCH 033/230] add header when any taxonomic classification is available --- assets/report_template.Rmd | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 839ffaff..c8efd5b6 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -329,6 +329,16 @@ ggplot(df_sum, theme_bw() ``` +```{r, results='asis'} +# Check if any taxonomic classification is available +any_taxonomy <- params$flag_dada2_taxonomy || params$flag_qiime2_taxonomy || params$flag_sintax_taxonomy || params$flag_pplace_taxonomy +``` + +```{r, eval = any_taxonomy, results='asis'} +# Header if any taxonomic classification is available +cat("# Taxonomic Classification\n") +``` + ```{r, eval = params$flag_dada2_taxonomy, results='asis'} # Header cat("# Taxonomic Classification\n") @@ -403,7 +413,7 @@ ggplot(asv_classi_df, ``` ```{r, eval = params$flag_qiime2_taxonomy, results='asis'} -# Header #TODO: that header line needs to be also displayed with other taxonomic assignments! +# Header cat("## Taxonomic Classification using QIIME2\n") #TODO: add database information @@ -522,7 +532,6 @@ ggplot(asv_classi_df, cat("## Taxonomic Classification using Phylogenetic Placement\n") #TODO: add database information -#TODO: only tested for greengenes85, need to test also UNITE and SILVA! # Read file and prepare table asv_tax <- read.table(params$pplace_taxonomy, header = TRUE, sep = "\t") From 3853405ebb5fab543c08eec0c7b0ac89367a5512 Mon Sep 17 00:00:00 2001 From: daniel Date: Thu, 22 Jun 2023 15:23:36 +0200 Subject: [PATCH 034/230] add explanations to elements before taxonomic classification --- assets/report_template.Rmd | 138 ++++++++++++++++++++++++++----------- 1 file changed, 99 insertions(+), 39 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index c8efd5b6..7961695e 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -75,10 +75,13 @@ knitr::opts_chunk$set(echo = FALSE) mqc_rep_path <- paste0("../multiqc/multiqc_report.html") cat("## FastQC\n") +cat("FastQC gives general quality metrics about your sequenced reads. ", + "It provides information about the quality score distribution across your reads, ", + "per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences.\n") cat("The sequence quality was checked using FastQC and resulting data was ", - "aggregated using the FastQC module of MultiQC. For more quality ", - "controls and per sample quality checks you can check the full ", - "MultiQC report, which is found [here](", mqc_rep_path, ").", sep = "") + "aggregated using the FastQC module of MultiQC. For more quality ", + "controls and per sample quality checks you can check the full ", + "MultiQC report, which is found [here](", mqc_rep_path, ").", sep = "") ``` ```{r, eval = !params$flag_skip_fastqc, out.width='100%', dpi=1200, fig.align='center'} @@ -86,6 +89,14 @@ knitr::include_graphics(params$mqc_plot) ``` ```{r, eval = !params$flag_skip_cutadapt, results='asis'} +cat("## Primer removal with Cutadapt\n") +cat("Cutadapt is trimming primer sequences from sequencing reads. ", + "Primer sequences are non-biological sequences that often introduce ", + "point mutations that do not reflect sample sequences. This is especially ", + "true for degenerated PCR primer. If primer trimming would be omitted, artifactual ", + "amplicon sequence variants might be computed by the denoising tool or ", + "sequences might be lost due to become labelled as PCR chimera.\n\n") + # import tsv cutadapt_summary <- read.table(file = params$ca_sum_path, header = TRUE, sep = "\t") @@ -93,13 +104,13 @@ passed_col <- as.numeric(substr( cutadapt_summary$cutadapt_passing_filters_percent, 1, 4)) max_disc <- 100 - min(passed_col) -avg_passed <- mean(passed_col) +avg_passed <- round(mean(passed_col),1) -cutadapt_text_unch <- paste0("## Cutadapt\n", - "Remaining primers were trimmed using cutadapt") -cutadapt_text_ch <- paste0("and all untrimmed sequences were discarded. <", - max_disc, "% of the sequences were discarded per sample and a mean of ", - avg_passed, "% of the sequences per sample passed the filtering.") +cutadapt_text_unch <- "Primers were trimmed using cutadapt" +cutadapt_text_ch <- paste0(" and all untrimmed sequences were discarded. ", + "Sequences that did not contain primer sequences were considered artifacts. Less than ", + max_disc, "% of the sequences were discarded per sample and a mean of ", + avg_passed, "% of the sequences per sample passed the filtering.") if (!params$flag_retain_untrimmed) cutadapt_text <- paste0( cutadapt_text_unch, cutadapt_text_ch @@ -117,14 +128,18 @@ cutadapt_summary$passed_num <- passed_col ggplot(cutadapt_summary, aes(x = sample, y = passed_col)) + geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) + - ylab("% passing filters of cutadapt") + + ylab("% sequencing reads passing filters of cutadapt") + xlab("Samples") + coord_flip() + theme_bw() ``` +## Quality filtering using DADA2 + ```{r, results='asis'} -cat("## QC using DADA2\n") +cat("Additional quality filtering can improve sequence recovery. ", + "Often it is advised trimming the last few nucleotides to avoid less well-controlled errors that can arise there. ") + if (params$trunc_qmin != -1) { f_and_tr_args <- readLines(params$dada_filtntrim_args) trunc_len <- strsplit(gsub(".*truncLen = c\\((.+)\\),maxN.*", "\\1", @@ -135,22 +150,24 @@ if (params$trunc_qmin != -1) { "below ", params$trunc_qmin, " and at least ",params$trunc_rmin*100, "% of reads are retained, resulting in a trim of ", "forward reads at ", tr_len_f, " bp and reverse ", - "reads at ", tr_len_r, " bp. ", sep = "") + "reads at ", tr_len_r, " bp, reads shorter than this are discarded. ", sep = "") } else if (params$trunclenf == "null" && params$trunclenr == "null") { cat("Reads were not trimmed.") } else if (params$trunclenf != 0 && params$trunclenr != 0) { cat("Forward reads were trimmed at ", params$trunclenf, " bp and reverse reads were trimmed at ", params$trunclenr, - " bp. ", sep = "") + " bp, reads shorter than this are discarded. ", sep = "") } else if (params$trunclenf != 0) { - cat("Forward reads were trimmed at ", params$trunclenf," bp. ", sep = "") + cat("Forward reads were trimmed at ", params$trunclenf," bp, reads shorter than this are discarded. ", sep = "") } else if (params$trunclenr != 0) { - cat("Reverse reads were trimmed at ", params$trunclenr," bp. ", sep = "") + cat("Reverse reads were trimmed at ", params$trunclenr," bp, reads shorter than this are discarded. ", sep = "") } cat("Reads with more than", params$max_ee,"expected errors were discarded.", sep = " ") ``` ```{r, eval = !params$flag_skip_dada_quality, results='asis'} +cat ("**Quality profiles:**\n\n") + if (params$flag_single_end) { cat("Read quality stats for incoming data:") } else { @@ -175,32 +192,48 @@ knitr::include_graphics(c(params$dada_pp_qc_f_path, params$dada_pp_qc_r_path)) ``` ```{r, eval = !params$flag_skip_dada_quality, results='asis'} -cat("Original plots can be found [here](../dada2/QC/).") +cat("Overall read quality profiles as heat map of the frequency of each quality score at each base position. ", + "The mean quality score at each position is shown by the green line, and the quartiles of the quality score ", + "distribution by the orange lines. The red line shows the scaled proportion of reads that extend to at least ", + "that position. Original plots can be found [folder dada2/QC/](../dada2/QC/) with names that end in '_qual_stats.pdf'.") ``` -## Error correction using DADA2 +# ASV inference using DADA2 + +DADA2 performs fast and accurate sample inference from amplicon data with single-nucleotide resolution. +It infers exact amplicon sequence variants (ASVs) from amplicon data with fewer false positives than +many other methods while maintaining high sensitivity. -Error correction was performed using DADA2 as well and the originalplots can be found [here](../dada2/QC/). +DADA2 reduces sequence errors and dereplicates sequences by quality filtering, denoising, +read pair merging (for paired end Illumina reads only) and PCR chimera removal. + +## Error correction + +Read error correction was performed using estimated error rates, visualized below. ```{r, out.width="49%", fig.show='hold', fig.align='default'} knitr::include_graphics(c(params$dada_1_err_path, params$dada_2_err_path)) ``` -```{r, results='asis'} -# Header -cat("## ASV inference using DADA2\n") +Estimated error rates for each possible transition. The black line shows the estimated error rates after +convergence of the machine-learning algorithm. The red line shows the error rates expected under the nominal +definition of the Q-score. The estimated error rates (black line) should be a good fit to the observed rates +(points), and the error rates should drop with increased quality. Original plots can be found in +[folder dada2/QC/](../dada2/QC/) with names that end in '.err.pdf'. -#import asv table -asv_table <- read.table(file = params$asv_table_path, header = TRUE, sep = "\t") -n_asv <- length(asv_table$ASV_ID) +## Read counts per sample -# Output text -cat(n_asv, - "amplicon sequence variants (ASVs) were obtained across all samples. ") -cat("The ASVs can be found in ['ASV_seqs.fasta'](../dada2/). And the corresponding", - " quantification of the ASVs across samples can be found in", - "['ASV_table.tsv'](../dada2/). An extensive table containing both can ", - "be found ['DADA2_table.tsv'](../dada2/)") +Tracking read numbers through DADA2 processing steps, for each sample. In the following table are read numbers after each processing stage. + +```{r, results='asis'} +if ( params$flag_single_end ) { + cat("Processing stages are: input - reads into DADA2, filtered - reads passed quality filtering, ", + "denoised - reads after denoising, nonchim - reads in non-chimeric sequences (final ASVs)") +} else { + cat("Processing stages are: input - read pairs into DADA2, filtered - read pairs passed quality filtering, ", + "denoisedF - forward reads after denoising, denoisedR - reverse reads after denoising, ", + "merged - successfully merged read pairs, nonchim - read pairs in non-chimeric sequences (final ASVs)") +} # import stats tsv dada_stats <- read.table(file = params$dada_stats_path, header = TRUE, sep = "\t") @@ -210,7 +243,14 @@ datatable(dada_stats, options = list( scrollX = TRUE, scrollY = "300px", paging = FALSE)) +``` +Samples with unusual low reads numbers relative to the number of expected ASVs (e.g. 500 reads with 100 ASVs) +should be treated cautiously, because the abundance estimate will be very granular and might vary strongly between (theoretical) +replicates due to high impact of stochasticity. + +Stacked barcharts of read numbers per sample and processing stage (see above): +```{r, results='asis'} # Stacked barchart to num of reads # Calc exluded asvs and transform all cols to percent @@ -228,7 +268,7 @@ if ( params$flag_single_end ) { # Stack columns for both stacked barcharts samples_t <- c(rep(dada_stats_p$sample, 4)) steps_t <- c(rep("excluded by filtering", n_samples), rep("excluded by denoised", n_samples), - rep("excluded by nonchim", n_samples), rep("ready for analysis", n_samples)) + rep("excluded by nonchim", n_samples), rep("reads in final ASVs", n_samples)) # stack the column for absolute number of asvs asvs_abs_t <- as.array(flatten_dbl(dada_stats_ex[3:6])) dada_stats_ex_t <- data.frame(samples_t, steps_t, asvs_abs_t) @@ -251,8 +291,8 @@ if ( params$flag_single_end ) { samples_t <- c(rep(dada_stats_p$sample, 6)) steps_t <- c(rep("excluded by filtering", n_samples), rep("excluded by denoisedF", n_samples), rep("excluded by denoisedR", n_samples), rep("excluded by merged", n_samples), - rep("excluded by nonchim", n_samples), rep("ready for analysis", n_samples)) - # stack the column for absolute number of asvs + rep("excluded by nonchim", n_samples), rep("reads in final ASVs", n_samples)) + # stack the column for absolute reads asvs_abs_t <- as.array(flatten_dbl(dada_stats_ex[3:8])) dada_stats_ex_t <- data.frame(samples_t, steps_t, asvs_abs_t) # stack the column for percentage of asvs @@ -265,7 +305,7 @@ dada_stats_ex_t$steps_t <- factor(dada_stats_ex_t$steps_t, levels=unique(dada_st ggplot(dada_stats_ex_t, aes(fill = steps_t, y = asvs_abs_t, x = samples_t)) + geom_bar(position = "stack", stat = "identity") + xlab("Samples") + - ylab("Absolute number ASVs") + + ylab("Absolute reads") + coord_flip() + scale_fill_brewer("Filtering Steps", palette = "Spectral") @@ -274,11 +314,31 @@ dada_stats_p_t$steps_t <- factor(dada_stats_p_t$steps_t, levels=unique(dada_stat ggplot(dada_stats_p_t, aes(fill = steps_t, y = asvs_p_t, x = samples_t)) + geom_bar(position = "fill", stat = "identity") + xlab("Samples") + - ylab("% of total ASVs") + + ylab("% of total reads") + coord_flip() + scale_fill_brewer("Filtering Steps", palette = "Spectral") ``` +The proportion of lost reads per processing stage and sample should not be too high, totalling typically <50%. +Samples that are very different in lost reads (per stage) to the majority of samples must be compared with caution, because an unusual problem +(e.g. during nucleotide extraction, library preparation, or sequencing) could have occurred that might add bias to the analysis. + +## Inferred ASVs + +```{r, results='asis'} +#import asv table +asv_table <- read.table(file = params$asv_table_path, header = TRUE, sep = "\t") +n_asv <- length(asv_table$ASV_ID) + +# Output text +cat("Finally,", n_asv, + "amplicon sequence variants (ASVs) were obtained across all samples. ") +cat("The ASVs can be found in ['dada2/ASV_seqs.fasta'](../dada2/ASV_seqs.fasta). And the corresponding", + " quantification of the ASVs across samples can be found in", + "['dada2/ASV_table.tsv'](../dada2/ASV_table.tsv). An extensive table containing both can ", + "be found ['dada2/DADA2_table.tsv'](../dada2/DADA2_table.tsv)") +``` + ```{r, eval = !params$flag_skip_barrnap, results='asis'} # Header cat("# Filtering of ASVs\n") @@ -316,7 +376,7 @@ cat( "Barrnap classified ") cat( df_sum$count[1], "(", df_sum$percent[1],"%) ASVs as most similar to Bacteria, " ) cat( df_sum$count[2], "(", df_sum$percent[2],"%) ASVs to Archea, " ) cat( df_sum$count[3], "(", df_sum$percent[3],"%) ASVs to Mitochondria, " ) -cat( df_sum$count[4], "(", df_sum$percent[4],"%) ASVs to Eukaryotes, and" ) +cat( df_sum$count[4], "(", df_sum$percent[4],"%) ASVs to Eukaryotes, and " ) cat( df_sum$count[5], "(", df_sum$percent[5],"%) were below similarity threshold to any kingdom." ) # Barplot @@ -557,7 +617,7 @@ asv_classi_df <- data.frame(level, n_asv_classified, p_asv_classified) outputstr <- "Phylogenetic Placement classified " for (row in seq_len(nrow(asv_classi_df))) { outputstr <- paste0(outputstr, asv_classi_df[row, ]$p_asv_classified, - " % ASVs at ", asv_classi_df[row, ]$level, " level, ") + " % ASVs at taxonomic level ", asv_classi_df[row, ]$level, ", ") } outputstr <- substr(outputstr, 1, nchar(outputstr)-2) outputstr <- paste0(outputstr, ".\n\n") @@ -572,7 +632,7 @@ ggplot(asv_classi_df, aes(x = reorder(level, desc(level)), y = p_asv_classified)) + geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) + ylab("% Classification") + - xlab("Levels") + + xlab("Taxonomic levels") + coord_flip() + theme_bw() ``` From ecf83dfdeda59e771edcd1bd821827c7877ead7e Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 23 Jun 2023 13:15:39 +0200 Subject: [PATCH 035/230] clean up cutadapt output --- assets/report_template.Rmd | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 7961695e..4d87f914 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -118,20 +118,23 @@ if (!params$flag_retain_untrimmed) cutadapt_text <- paste0( cat(cutadapt_text) +# shorten header by "cutadapt_" to optimize visualisation +colnames(cutadapt_summary) <- gsub("cutadapt_","",colnames(cutadapt_summary)) + datatable(cutadapt_summary, options = list( scrollX = TRUE, scrollY = "300px", paging = FALSE)) -cutadapt_summary$passed_num <- passed_col - -ggplot(cutadapt_summary, - aes(x = sample, y = passed_col)) + - geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) + - ylab("% sequencing reads passing filters of cutadapt") + - xlab("Samples") + - coord_flip() + - theme_bw() +# Barplot TODO: currently skipper, because this is already in the table +#cutadapt_summary$passed_num <- passed_col +#ggplot(cutadapt_summary, +# aes(x = sample, y = passed_col)) + +# geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) + +# ylab("% sequencing reads passing filters of cutadapt") + +# xlab("Samples") + +# coord_flip() + +# theme_bw() ``` ## Quality filtering using DADA2 From a21d54a5e60b5cf572f5b919162529cd0e08a699 Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 23 Jun 2023 16:50:40 +0200 Subject: [PATCH 036/230] add length filter --- assets/report_template.Rmd | 116 +++++++++++++++++++++++++++----- bin/generate_report.R | 10 +++ modules/local/summary_report.nf | 19 ++++-- workflows/ampliseq.nf | 3 + 4 files changed, 127 insertions(+), 21 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 4d87f914..2085e9bd 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -31,6 +31,8 @@ params: max_ee: "" trunc_qmin: "" trunc_rmin: "" + min_len_asv: "" + max_len_asv: "" # file paths mqc_plot: "" @@ -51,8 +53,11 @@ params: path_rrna_euk: "" path_rrna_mito: "" path_barrnap_sum: "" + filter_len_asv: "" + filter_len_asv_len_orig: "" + filter_codons: "" ref_tax_path: "" - dada2_taxonomy: "empty" + dada2_taxonomy: "" sintax_taxonomy: "" pplace_taxonomy: "" qiime2_taxonomy: "" @@ -304,13 +309,13 @@ if ( params$flag_single_end ) { } # Plot -dada_stats_ex_t$steps_t <- factor(dada_stats_ex_t$steps_t, levels=unique(dada_stats_ex_t$steps_t)) -ggplot(dada_stats_ex_t, aes(fill = steps_t, y = asvs_abs_t, x = samples_t)) + - geom_bar(position = "stack", stat = "identity") + - xlab("Samples") + - ylab("Absolute reads") + - coord_flip() + - scale_fill_brewer("Filtering Steps", palette = "Spectral") +#dada_stats_ex_t$steps_t <- factor(dada_stats_ex_t$steps_t, levels=unique(dada_stats_ex_t$steps_t)) +#ggplot(dada_stats_ex_t, aes(fill = steps_t, y = asvs_abs_t, x = samples_t)) + +# geom_bar(position = "stack", stat = "identity") + +# xlab("Samples") + +# ylab("Absolute reads") + +# coord_flip() + +# scale_fill_brewer("Filtering Steps", palette = "Spectral") # Plot dada_stats_p_t$steps_t <- factor(dada_stats_p_t$steps_t, levels=unique(dada_stats_p_t$steps_t)) @@ -336,16 +341,22 @@ n_asv <- length(asv_table$ASV_ID) # Output text cat("Finally,", n_asv, "amplicon sequence variants (ASVs) were obtained across all samples. ") -cat("The ASVs can be found in ['dada2/ASV_seqs.fasta'](../dada2/ASV_seqs.fasta). And the corresponding", - " quantification of the ASVs across samples can be found in", - "['dada2/ASV_table.tsv'](../dada2/ASV_table.tsv). An extensive table containing both can ", - "be found ['dada2/DADA2_table.tsv'](../dada2/DADA2_table.tsv)") +cat("The ASVs can be found in ['dada2/ASV_seqs.fasta'](../dada2/). And the corresponding", + " quantification of the ASVs across samples is in", + "['dada2/ASV_table.tsv'](../dada2/). An extensive table containing both was ", + "saved as ['dada2/DADA2_table.tsv'](../dada2/)") ``` -```{r, eval = !params$flag_skip_barrnap, results='asis'} -# Header +```{r, results='asis'} +flag_any_filtering <- !params$flag_skip_barrnap || isTRUE(params$filter_len_asv != "") || isTRUE(params$filter_codons != "") +``` + +```{r, eval = flag_any_filtering, results='asis'} cat("# Filtering of ASVs\n") -cat("## ASV filtering using Barrnap\n") +``` + +```{r, eval = !params$flag_skip_barrnap, results='asis'} +cat("## rRNA detection\n") cat("Barrnap classifies the ASVs into the origin domain (including ", "mitochondiral origin). Using this classification the ASVs can ", "be filtered by sample origin.\n\n", sep = "") @@ -392,6 +403,79 @@ ggplot(df_sum, theme_bw() ``` +```{r, results='asis'} +flag_filter_len_asv <- isTRUE(params$filter_len_asv != "") +``` + +```{r, eval = flag_filter_len_asv, results='asis'} +cat("## Sequence length\n") + +cat("A length filter was used to reduce potential contamination after ASV computation.", + "Before filtering, ASVs had the following length profile:\n\n") + +# ASV length profile + +# import length profile tsv +filter_len_profile <- read.table(file = params$filter_len_asv_len_orig, header = TRUE, sep = "\t") + +# find number of ASVs filtered +filter_len_asv_filtered <- filter_len_profile +if ( params$min_len_asv != 0 ) { + filter_len_asv_filtered <- subset(filter_len_asv_filtered, Length >= params$min_len_asv) +} +if ( params$max_len_asv != 0 ) { + filter_len_asv_filtered <- subset(filter_len_asv_filtered, Length <= params$max_len_asv) +} + +ggplot(filter_len_profile, + aes(x = Length, y = Counts)) + + geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) + + ylab("Number of ASVs") + + xlab("Length") + + coord_flip() + + theme_bw() + +# Reads removed + +# import stats tsv +filter_len_stats <- read.table(file = params$filter_len_asv, header = TRUE, sep = "\t") +# re-name & re-order columns +colnames(filter_len_stats) <- gsub("lenfilter_","",colnames(filter_len_stats)) +filter_len_stats <- filter_len_stats[, c("sample", "input", "output")] +filter_len_stats$'retained%' <- round( filter_len_stats$output / filter_len_stats$input * 100 , 2) +filter_len_stats_avg_removed <- 100-sum(filter_len_stats$'retained%')/length(filter_len_stats$'retained%') +filter_len_stats_max_removed <- 100-min(filter_len_stats$'retained%') + +cat("\n\n") +if ( params$min_len_asv != 0 && params$max_len_asv != 0 ) { + cat("Filtering omitted all ASVs with length lower than",params$min_len_asv,"or above",params$max_len_asv,"bp. ") +} else if ( params$min_len_asv != 0 ) { + cat("Filtering omitted all ASVs with length lower than",params$min_len_asv,"bp. ") +} else if ( params$max_len_asv != 0 ) { + cat("Filtering omitted all ASVs with length above",params$max_len_asv,"bp. ") +} +cat("The following table shows (read) counts for each sample before and after filtering:") + +# Display table +datatable(filter_len_stats, options = list( + scrollX = TRUE, + scrollY = "300px", + paging = FALSE)) + +cat("In average", filter_len_stats_avg_removed, "% reads were removed, but at most",filter_len_stats_max_removed,"% reads per sample. ") +cat("The number of ASVs was reduced by",sum(filter_len_profile$Counts)-sum(filter_len_asv_filtered$Counts),"(",100-round( sum(filter_len_asv_filtered$Counts)/sum(filter_len_profile$Counts)*100 ,2),"%), from",sum(filter_len_profile$Counts),"to",sum(filter_len_asv_filtered$Counts)," ASVs.") +``` + +```{r, results='asis'} +flag_filter_codons <- isTRUE(params$filter_codons != "") +``` + +```{r, eval = flag_filter_codons, results='asis'} +cat("## Codon usage\n") +#TODO: fill with content! +cat("flag_filter_codons", flag_filter_codons, "params$filter_codons", params$filter_codons) +``` + ```{r, results='asis'} # Check if any taxonomic classification is available any_taxonomy <- params$flag_dada2_taxonomy || params$flag_qiime2_taxonomy || params$flag_sintax_taxonomy || params$flag_pplace_taxonomy @@ -403,8 +487,6 @@ cat("# Taxonomic Classification\n") ``` ```{r, eval = params$flag_dada2_taxonomy, results='asis'} -# Header -cat("# Taxonomic Classification\n") cat("## Taxonomic Classification using DADA2\n") if (!params$flag_ref_tax_user) { diff --git a/bin/generate_report.R b/bin/generate_report.R index 5c3894fb..070463f4 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -38,6 +38,11 @@ option_list = list( make_option(c("--path_rrna_euk"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--path_rrna_mito"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--path_barrnap_sum"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--filter_len_asv"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--min_len_asv"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--max_len_asv"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--filter_codons"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--filter_len_asv_len_orig"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--ref_tax_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--dada2_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--flag_dada2_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), @@ -85,6 +90,11 @@ rmarkdown::render(opt$report, output_file = opt$output, path_rrna_euk = opt$path_rrna_euk, path_rrna_mito = opt$path_rrna_mito, path_barrnap_sum = opt$path_barrnap_sum, + filter_len_asv = opt$filter_len_asv, + min_len_asv = opt$min_len_asv, + max_len_asv = opt$max_len_asv, + filter_len_asv_len_orig = opt$filter_len_asv_len_orig, + filter_codons = opt$filter_codons, ref_tax_path = opt$ref_tax_path, dada2_taxonomy = opt$dada2_taxonomy, flag_dada2_taxonomy = opt$flag_dada2_taxonomy, diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 3ef91cd0..e887f832 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -3,10 +3,12 @@ process SUMMARY_REPORT { label 'process_low' container 'docker.io/tillenglert/ampliseq_report:latest' - //conda (params.enable_conda ? "bioconda:r-markdown==0.8--r3.4.1_1" : null) - //container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - // 'https://depot.galaxyproject.org/singularity/r-markdown:0.8--r3.4.1_1' : - // 'quay.io/biocontainers/r-markdown:0.8--r3.4.1_1' }" + /* this is from https://github.com/nf-core/modules/blob/master/modules/nf-core/rmarkdownnotebook/main.nf but doesnt work + conda "conda-forge::r-base=4.1.0 conda-forge::r-rmarkdown=2.9 conda-forge::r-yaml=2.2.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-31ad840d814d356e5f98030a4ee308a16db64ec5:0e852a1e4063fdcbe3f254ac2c7469747a60e361-0' : + 'biocontainers/mulled-v2-31ad840d814d356e5f98030a4ee308a16db64ec5:0e852a1e4063fdcbe3f254ac2c7469747a60e361-0' }" + */ input: path(report_template) @@ -24,6 +26,9 @@ process SUMMARY_REPORT { path(dada_stats) path(barrnap_gff) path(barrnap_summary) + path(filter_len_asv_stats) + path(filter_len_asv_len_orig) + path(filter_codons_stats) path(dada2_tax_reference) path(dada2_tax) path(sintax_tax) @@ -51,6 +56,10 @@ process SUMMARY_REPORT { def find_truncation = find_truncation_values ? "--trunc_qmin $params.trunc_qmin --trunc_rmin $params.trunc_rmin" : "" def dada_err = meta.single_end ? "--dada_1_err_path $dada_err_svgs" : "--dada_1_err_path ${dada_err_svgs[0]} --dada_2_err_path ${dada_err_svgs[1]}" def barrnap = params.skip_barrnap ? "--skip_barrnap" : "--path_rrna_arc ${barrnap_gff[0]} --path_rrna_bac ${barrnap_gff[1]} --path_rrna_euk ${barrnap_gff[2]} --path_rrna_mito ${barrnap_gff[3]} --path_barrnap_sum $barrnap_summary" + def filter_len_asv = filter_len_asv_stats ? "--filter_len_asv $filter_len_asv_stats --filter_len_asv_len_orig $filter_len_asv_len_orig" : "" + filter_len_asv += params.min_len_asv ? " --min_len_asv $params.min_len_asv " : " --min_len_asv 0" + filter_len_asv += params.max_len_asv ? " --max_len_asv $params.max_len_asv" : " --max_len_asv 0" + def filter_codons = filter_codons_stats ? "--filter_codons $filter_codons_stats" : "" def dada2_taxonomy = !dada2_tax ? "" : params.dada_ref_tax_custom ? "--flag_dada2_taxonomy --dada2_taxonomy $dada2_tax --ref_tax_user" : "--flag_dada2_taxonomy --dada2_taxonomy $dada2_tax --ref_tax_path $dada2_tax_reference" def sintax_taxonomy = sintax_tax ? "--flag_sintax_taxonomy --sintax_taxonomy $sintax_tax" : "" @@ -74,6 +83,8 @@ process SUMMARY_REPORT { --trunclenf $params.trunclenf \\ --trunclenr $params.trunclenr \\ --max_ee $params.max_ee \\ + $filter_len_asv \\ + $filter_codons \\ $dada2_taxonomy \\ $sintax_taxonomy \\ $pplace_taxonomy \\ diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index dd74cfe7..1741c481 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -682,6 +682,9 @@ workflow AMPLISEQ { DADA2_MERGE.out.dada2stats, !params.skip_barrnap ? BARRNAP.out.gff.collect(sort: true) : [], !params.skip_barrnap ? BARRNAPSUMMARY.out.summary : [], + params.min_len_asv || params.max_len_asv ? FILTER_LEN_ASV.out.stats : [], + params.min_len_asv || params.max_len_asv ? FILTER_LEN_ASV.out.len_orig : [], + params.filter_codons ? FILTER_CODONS.out.stats : [], !params.skip_taxonomy && params.dada_ref_taxonomy && !params.skip_dada_taxonomy && !params.dada_ref_tax_custom ? FORMAT_TAXONOMY.out.ref_tax_info : [], !params.skip_taxonomy && params.dada_ref_taxonomy && !params.skip_dada_taxonomy ? ch_dada2_tax : [], !params.skip_taxonomy && params.sintax_ref_taxonomy ? ch_sintax_tax : [], From a4812ddfc41b4455588401b55c6371f3334d73b8 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 27 Jun 2023 09:21:11 +0200 Subject: [PATCH 037/230] bump-version-2.7.0dev --- CHANGELOG.md | 12 ++++++++++++ nextflow.config | 2 +- tests/pipeline/doubleprimers.nf.test.snap | 2 +- tests/pipeline/fasta.nf.test.snap | 2 +- tests/pipeline/iontorrent.nf.test.snap | 2 +- tests/pipeline/multi.nf.test.snap | 2 +- tests/pipeline/novaseq.nf.test.snap | 2 +- tests/pipeline/pacbio_its.nf.test.snap | 2 +- tests/pipeline/pplace.nf.test.snap | 2 +- tests/pipeline/reftaxcustom.nf.test.snap | 2 +- tests/pipeline/single.nf.test.snap | 2 +- tests/pipeline/sintax.nf.test.snap | 2 +- tests/pipeline/test.nf.test.snap | 2 +- 13 files changed, 24 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 90e3e6a8..68487790 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## nf-core/ampliseq version 2.7.0dev + +### `Added` + +### `Changed` + +### `Fixed` + +### `Dependencies` + +### `Removed` + ## nf-core/ampliseq version 2.6.0 - 2023-06-27 ### `Added` diff --git a/nextflow.config b/nextflow.config index 9337c0de..f3f0ca67 100644 --- a/nextflow.config +++ b/nextflow.config @@ -300,7 +300,7 @@ manifest { description = """Amplicon sequencing analysis workflow using DADA2 and QIIME2""" mainScript = 'main.nf' nextflowVersion = '!>=22.10.1' - version = '2.6.0' + version = '2.7.0dev' doi = '10.3389/fmicb.2020.550420' } diff --git a/tests/pipeline/doubleprimers.nf.test.snap b/tests/pipeline/doubleprimers.nf.test.snap index b0f87eb2..64ddaa21 100644 --- a/tests/pipeline/doubleprimers.nf.test.snap +++ b/tests/pipeline/doubleprimers.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.6.0}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-05-28T21:08:54+0000" }, diff --git a/tests/pipeline/fasta.nf.test.snap b/tests/pipeline/fasta.nf.test.snap index e07eaf3d..c1c9cd9f 100644 --- a/tests/pipeline/fasta.nf.test.snap +++ b/tests/pipeline/fasta.nf.test.snap @@ -7,7 +7,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, Workflow={nf-core/ampliseq=2.6.0}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-05-28T21:06:17+0000" }, diff --git a/tests/pipeline/iontorrent.nf.test.snap b/tests/pipeline/iontorrent.nf.test.snap index 17a54c28..c9c8f4bb 100644 --- a/tests/pipeline/iontorrent.nf.test.snap +++ b/tests/pipeline/iontorrent.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.6.0}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-06-20T01:42:35+0000" }, diff --git a/tests/pipeline/multi.nf.test.snap b/tests/pipeline/multi.nf.test.snap index a54fb230..2f0095ac 100644 --- a/tests/pipeline/multi.nf.test.snap +++ b/tests/pipeline/multi.nf.test.snap @@ -14,7 +14,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.6.0}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-05-28T21:15:03+0000" }, diff --git a/tests/pipeline/novaseq.nf.test.snap b/tests/pipeline/novaseq.nf.test.snap index 6a9514cf..737b16a2 100644 --- a/tests/pipeline/novaseq.nf.test.snap +++ b/tests/pipeline/novaseq.nf.test.snap @@ -7,7 +7,7 @@ }, "software_versions": { "content": [ - "{CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.6.0}}" + "{CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-06-20T00:10:02+0000" }, diff --git a/tests/pipeline/pacbio_its.nf.test.snap b/tests/pipeline/pacbio_its.nf.test.snap index 46d609ec..3c860a89 100644 --- a/tests/pipeline/pacbio_its.nf.test.snap +++ b/tests/pipeline/pacbio_its.nf.test.snap @@ -35,7 +35,7 @@ }, "software_versions": { "content": [ - "{ASSIGNSH={pandas=1.1.5, python=3.9.1}, BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FORMAT_TAXRESULTS_STD={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_USEARCHGLOBAL={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.6.0}}" + "{ASSIGNSH={pandas=1.1.5, python=3.9.1}, BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FORMAT_TAXRESULTS_STD={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_USEARCHGLOBAL={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-06-20T02:07:02+0000" }, diff --git a/tests/pipeline/pplace.nf.test.snap b/tests/pipeline/pplace.nf.test.snap index e284e31d..d0aa5f26 100644 --- a/tests/pipeline/pplace.nf.test.snap +++ b/tests/pipeline/pplace.nf.test.snap @@ -8,7 +8,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, EPANG_PLACE={epang=0.3.8}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, GAPPA_ASSIGN={gappa=0.8.0}, GAPPA_GRAFT={gappa=0.8.0}, GAPPA_HEATTREE={gappa=0.8.0}, HMMER_AFAFORMATQUERY={hmmer/easel=0.48}, HMMER_AFAFORMATREF={hmmer/easel=0.48}, HMMER_HMMALIGNQUERY={hmmer=3.3.2}, HMMER_HMMALIGNREF={hmmer=3.3.2}, HMMER_HMMBUILD={hmmer=3.3.2}, HMMER_MASKQUERY={hmmer/easel=0.48}, HMMER_MASKREF={hmmer/easel=0.48}, HMMER_UNALIGNREF={hmmer/easel=0.48}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.6.0}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, EPANG_PLACE={epang=0.3.8}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, GAPPA_ASSIGN={gappa=0.8.0}, GAPPA_GRAFT={gappa=0.8.0}, GAPPA_HEATTREE={gappa=0.8.0}, HMMER_AFAFORMATQUERY={hmmer/easel=0.48}, HMMER_AFAFORMATREF={hmmer/easel=0.48}, HMMER_HMMALIGNQUERY={hmmer=3.3.2}, HMMER_HMMALIGNREF={hmmer=3.3.2}, HMMER_HMMBUILD={hmmer=3.3.2}, HMMER_MASKQUERY={hmmer/easel=0.48}, HMMER_MASKREF={hmmer/easel=0.48}, HMMER_UNALIGNREF={hmmer/easel=0.48}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-06-20T17:24:03+0000" }, diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap index 9c21f20e..6407a3bf 100644 --- a/tests/pipeline/reftaxcustom.nf.test.snap +++ b/tests/pipeline/reftaxcustom.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.6.0}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-05-28T21:18:54+0000" }, diff --git a/tests/pipeline/single.nf.test.snap b/tests/pipeline/single.nf.test.snap index 1a57197d..49d65106 100644 --- a/tests/pipeline/single.nf.test.snap +++ b/tests/pipeline/single.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.6.0}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-05-28T20:35:33+0000" }, diff --git a/tests/pipeline/sintax.nf.test.snap b/tests/pipeline/sintax.nf.test.snap index a2ff96db..c9745541 100644 --- a/tests/pipeline/sintax.nf.test.snap +++ b/tests/pipeline/sintax.nf.test.snap @@ -16,7 +16,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.6.0}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-06-20T16:40:18+0000" }, diff --git a/tests/pipeline/test.nf.test.snap b/tests/pipeline/test.nf.test.snap index dd6ec5fa..fdf84093 100644 --- a/tests/pipeline/test.nf.test.snap +++ b/tests/pipeline/test.nf.test.snap @@ -22,7 +22,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.6.0}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-05-28T20:55:32+0000" }, From f6bb28aa802aea19f32af203e118899c5aec5400 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 27 Jun 2023 11:04:07 +0200 Subject: [PATCH 038/230] add minimum codon filtering section --- assets/report_template.Rmd | 22 ++++++++++++++++++++-- bin/generate_report.R | 2 ++ modules/local/summary_report.nf | 2 +- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 2085e9bd..f4506856 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -56,6 +56,7 @@ params: filter_len_asv: "" filter_len_asv_len_orig: "" filter_codons: "" + stop_codons: "" ref_tax_path: "" dada2_taxonomy: "" sintax_taxonomy: "" @@ -401,6 +402,8 @@ ggplot(df_sum, xlab("rRNA origins") + coord_flip() + theme_bw() + +#TODO: add reads/ASVs removed as below?! ``` ```{r, results='asis'} @@ -472,8 +475,23 @@ flag_filter_codons <- isTRUE(params$filter_codons != "") ```{r, eval = flag_filter_codons, results='asis'} cat("## Codon usage\n") -#TODO: fill with content! -cat("flag_filter_codons", flag_filter_codons, "params$filter_codons", params$filter_codons) + +cat("Amplicons of coding regions are expected to be free of stop codons and consist of condon tripletts.", + "ASVs were filtered against the presence of stop codons (",params$stop_codons,") in the specified open reading frame of the ASV.", + "Additionally, ASVs that are not multiple of 3 in length were omitted.\n\n") + +# import stats tsv +filter_codons_stats <- read.table(file = params$filter_codons, header = TRUE, sep = "\t") + +cat("The following table shows read counts for each sample after filtering:") + +# Display table +datatable(filter_codons_stats, options = list( + scrollX = TRUE, + scrollY = "300px", + paging = FALSE)) + +#TODO: add ASV count after filtering ``` ```{r, results='asis'} diff --git a/bin/generate_report.R b/bin/generate_report.R index 070463f4..48e1b1ea 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -42,6 +42,7 @@ option_list = list( make_option(c("--min_len_asv"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--max_len_asv"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--filter_codons"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--stop_codons"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--filter_len_asv_len_orig"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--ref_tax_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--dada2_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), @@ -95,6 +96,7 @@ rmarkdown::render(opt$report, output_file = opt$output, max_len_asv = opt$max_len_asv, filter_len_asv_len_orig = opt$filter_len_asv_len_orig, filter_codons = opt$filter_codons, + stop_codons = opt$stop_codons, ref_tax_path = opt$ref_tax_path, dada2_taxonomy = opt$dada2_taxonomy, flag_dada2_taxonomy = opt$flag_dada2_taxonomy, diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index e887f832..8e672c2d 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -59,7 +59,7 @@ process SUMMARY_REPORT { def filter_len_asv = filter_len_asv_stats ? "--filter_len_asv $filter_len_asv_stats --filter_len_asv_len_orig $filter_len_asv_len_orig" : "" filter_len_asv += params.min_len_asv ? " --min_len_asv $params.min_len_asv " : " --min_len_asv 0" filter_len_asv += params.max_len_asv ? " --max_len_asv $params.max_len_asv" : " --max_len_asv 0" - def filter_codons = filter_codons_stats ? "--filter_codons $filter_codons_stats" : "" + def filter_codons = filter_codons_stats ? "--filter_codons $filter_codons_stats --stop_codons $params.stop_codons" : "" def dada2_taxonomy = !dada2_tax ? "" : params.dada_ref_tax_custom ? "--flag_dada2_taxonomy --dada2_taxonomy $dada2_tax --ref_tax_user" : "--flag_dada2_taxonomy --dada2_taxonomy $dada2_tax --ref_tax_path $dada2_tax_reference" def sintax_taxonomy = sintax_tax ? "--flag_sintax_taxonomy --sintax_taxonomy $sintax_tax" : "" From de74d9886018e3ab23af53392e95e5ede1f72db4 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 27 Jun 2023 13:12:58 +0200 Subject: [PATCH 039/230] improve barrnap reporting --- assets/report_template.Rmd | 77 ++++++++++++++++++++++----------- bin/generate_report.R | 14 +++--- modules/local/summary_report.nf | 6 ++- workflows/ampliseq.nf | 3 +- 4 files changed, 64 insertions(+), 36 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index f4506856..2678089e 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -31,6 +31,7 @@ params: max_ee: "" trunc_qmin: "" trunc_rmin: "" + filter_ssu: "" min_len_asv: "" max_len_asv: "" @@ -48,11 +49,9 @@ params: path_asv_fa: "" path_dada2_tab: "" dada_stats_path: "" - path_rrna_arc: "" - path_rrna_bac: "" - path_rrna_euk: "" - path_rrna_mito: "" path_barrnap_sum: "" + filter_ssu_stats: "" + filter_ssu_asv: "" filter_len_asv: "" filter_len_asv_len_orig: "" filter_codons: "" @@ -358,52 +357,80 @@ cat("# Filtering of ASVs\n") ```{r, eval = !params$flag_skip_barrnap, results='asis'} cat("## rRNA detection\n") -cat("Barrnap classifies the ASVs into the origin domain (including ", - "mitochondiral origin). Using this classification the ASVs can ", - "be filtered by sample origin.\n\n", sep = "") +cat("Barrnap classifies the ASVs into the origin domain (including mitochondiral origin).\n\n", sep = "") # Read the barrnap files and count the lines -df = read.table( params$path_barrnap_sum, header = TRUE, sep = "\t", stringsAsFactors = FALSE) +barrnap_sum = read.table( params$path_barrnap_sum, header = TRUE, sep = "\t", stringsAsFactors = FALSE) # keep only ASV_ID & eval columns & sort -df <- subset(df, select = c(ASV_ID,mito_eval,euk_eval,arc_eval,bac_eval)) +barrnap_sum <- subset(barrnap_sum, select = c(ASV_ID,mito_eval,euk_eval,arc_eval,bac_eval)) # choose kingdom (column) with lowest evalue -df[is.na(df)] <- 1 -df$result = colnames(df[,2:5])[apply(df[,2:5],1,which.min)] -df$result = gsub("_eval", "", df$result) +barrnap_sum[is.na(barrnap_sum)] <- 1 +barrnap_sum$result = colnames(barrnap_sum[,2:5])[apply(barrnap_sum[,2:5],1,which.min)] +barrnap_sum$result = gsub("_eval", "", barrnap_sum$result) #import asv table asv_table <- read.table(file = params$asv_table_path, header = TRUE, sep = "\t") n_asv <- length(asv_table$ASV_ID) # calculate numbers -n_classified <- length(df$result) -n_bac <- sum(grepl("bac", df$result)) -n_arc <- sum(grepl("arc", df$result)) -n_mito <- sum(grepl("mito", df$result)) -n_euk <- sum(grepl("euk", df$result)) +n_classified <- length(barrnap_sum$result) +n_bac <- sum(grepl("bac", barrnap_sum$result)) +n_arc <- sum(grepl("arc", barrnap_sum$result)) +n_mito <- sum(grepl("mito", barrnap_sum$result)) +n_euk <- sum(grepl("euk", barrnap_sum$result)) -df_sum <- data.frame(label=c('Bacteria','Archea','Mitochondria','Eukaryotes','Unclassified'), +barrnap_df_sum <- data.frame(label=c('Bacteria','Archea','Mitochondria','Eukaryotes','Unclassified'), count=c(n_bac,n_arc,n_mito,n_euk,n_asv - n_classified), percent=c(round( (n_bac/n_asv)*100, 2), round( (n_arc/n_asv)*100, 2), round( (n_mito/n_asv)*100, 2), round( (n_euk/n_asv)*100, 2), round( ( (n_asv - n_classified) /n_asv)*100, 2) ) ) # Build outputtext cat( "Barrnap classified ") -cat( df_sum$count[1], "(", df_sum$percent[1],"%) ASVs as most similar to Bacteria, " ) -cat( df_sum$count[2], "(", df_sum$percent[2],"%) ASVs to Archea, " ) -cat( df_sum$count[3], "(", df_sum$percent[3],"%) ASVs to Mitochondria, " ) -cat( df_sum$count[4], "(", df_sum$percent[4],"%) ASVs to Eukaryotes, and " ) -cat( df_sum$count[5], "(", df_sum$percent[5],"%) were below similarity threshold to any kingdom." ) +cat( barrnap_df_sum$count[1], "(", barrnap_df_sum$percent[1],"%) ASVs as most similar to Bacteria, " ) +cat( barrnap_df_sum$count[2], "(", barrnap_df_sum$percent[2],"%) ASVs to Archea, " ) +cat( barrnap_df_sum$count[3], "(", barrnap_df_sum$percent[3],"%) ASVs to Mitochondria, " ) +cat( barrnap_df_sum$count[4], "(", barrnap_df_sum$percent[4],"%) ASVs to Eukaryotes, and " ) +cat( barrnap_df_sum$count[5], "(", barrnap_df_sum$percent[5],"%) were below similarity threshold to any kingdom." ) # Barplot -ggplot(df_sum, +ggplot(barrnap_df_sum, aes(x = reorder(label, desc(label)), y = percent)) + geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) + ylab("% Classification") + xlab("rRNA origins") + coord_flip() + theme_bw() +``` + +```{r, results='asis'} +flag_filter_ssu <- !params$flag_skip_barrnap && isTRUE(params$filter_ssu != "none") +``` + +```{r, eval = flag_filter_ssu, results='asis'} +cat("ASVs were filtered for (",params$filter_ssu,") using the above classification.", + "The following table shows read counts for each sample before and after filtering:\n\n", sep = "") + +# Read the barrnap stats file +filter_ssu_stats = read.table( params$filter_ssu_stats, header = TRUE, sep = "\t", stringsAsFactors = FALSE) +# shorten header by "ssufilter_" to optimize visualisation +colnames(filter_ssu_stats) <- gsub("ssufilter_","",colnames(filter_ssu_stats)) +filter_ssu_stats <- subset(filter_ssu_stats, select = c(sample,input,output)) +filter_ssu_stats$'retained%' <- round( filter_ssu_stats$output / filter_ssu_stats$input *100, 2) +filter_ssu_stats_avg_removed <- 100-sum(filter_ssu_stats$'retained%')/length(filter_ssu_stats$'retained%') +filter_ssu_stats_max_removed <- 100-min(filter_ssu_stats$'retained%') + +# Display table +datatable(filter_ssu_stats, options = list( + scrollX = TRUE, + scrollY = "300px", + paging = FALSE)) + +# Read the barrnap asv file +filter_ssu_asv <- read.table( params$filter_ssu_asv, header = TRUE, sep = "\t", stringsAsFactors = FALSE) +filter_ssu_asv_filtered <- nrow(filter_ssu_asv) -#TODO: add reads/ASVs removed as below?! +cat("In average", round(filter_ssu_stats_avg_removed,2), "% reads were removed, but at most",filter_ssu_stats_max_removed,"% reads per sample. ") +# "n_asv" is taken from the barrnap block above +cat("The number of ASVs was reduced by",n_asv-filter_ssu_asv_filtered,"(",100-round( filter_ssu_asv_filtered/n_asv*100 ,2),"%), from",n_asv,"to",filter_ssu_asv_filtered," ASVs.") ``` ```{r, results='asis'} diff --git a/bin/generate_report.R b/bin/generate_report.R index 48e1b1ea..69b7cdb6 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -33,10 +33,9 @@ option_list = list( make_option(c("--path_asv_fa"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--path_dada2_tab"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--dada_stats_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--path_rrna_arc"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--path_rrna_bac"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--path_rrna_euk"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--path_rrna_mito"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--filter_ssu"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--filter_ssu_stats"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--filter_ssu_asv"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--path_barrnap_sum"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--filter_len_asv"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--min_len_asv"), type="character", default=NULL, help="MultiQC plots", metavar="character"), @@ -86,10 +85,9 @@ rmarkdown::render(opt$report, output_file = opt$output, path_asv_fa = opt$path_asv_fa, path_dada2_tab = opt$path_dada2_tab, dada_stats_path = opt$dada_stats_path, - path_rrna_arc = opt$path_rrna_arc, - path_rrna_bac = opt$path_rrna_bac, - path_rrna_euk = opt$path_rrna_euk, - path_rrna_mito = opt$path_rrna_mito, + filter_ssu = opt$filter_ssu, + filter_ssu_stats = opt$filter_ssu_stats, + filter_ssu_asv = opt$filter_ssu_asv, path_barrnap_sum = opt$path_barrnap_sum, filter_len_asv = opt$filter_len_asv, min_len_asv = opt$min_len_asv, diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 8e672c2d..e7ae9ae7 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -24,8 +24,9 @@ process SUMMARY_REPORT { path(dada_asv_fa) path(dada_tab) path(dada_stats) - path(barrnap_gff) path(barrnap_summary) + path(filter_ssu_stats) + path(filter_ssu_asv) path(filter_len_asv_stats) path(filter_len_asv_len_orig) path(filter_codons_stats) @@ -55,7 +56,8 @@ process SUMMARY_REPORT { "--dada_qc_f_path 'FW_qual_stats.svg' --dada_qc_r_path 'RV_qual_stats.svg' --dada_pp_qc_f_path 'FW_preprocessed_qual_stats.svg' --dada_pp_qc_r_path 'RV_preprocessed_qual_stats.svg'" def find_truncation = find_truncation_values ? "--trunc_qmin $params.trunc_qmin --trunc_rmin $params.trunc_rmin" : "" def dada_err = meta.single_end ? "--dada_1_err_path $dada_err_svgs" : "--dada_1_err_path ${dada_err_svgs[0]} --dada_2_err_path ${dada_err_svgs[1]}" - def barrnap = params.skip_barrnap ? "--skip_barrnap" : "--path_rrna_arc ${barrnap_gff[0]} --path_rrna_bac ${barrnap_gff[1]} --path_rrna_euk ${barrnap_gff[2]} --path_rrna_mito ${barrnap_gff[3]} --path_barrnap_sum $barrnap_summary" + def barrnap = params.skip_barrnap ? "--skip_barrnap" : "--path_barrnap_sum $barrnap_summary" + barrnap += filter_ssu_stats ? " --filter_ssu_stats $filter_ssu_stats --filter_ssu_asv $filter_ssu_asv --filter_ssu $params.filter_ssu" : " --filter_ssu none" def filter_len_asv = filter_len_asv_stats ? "--filter_len_asv $filter_len_asv_stats --filter_len_asv_len_orig $filter_len_asv_len_orig" : "" filter_len_asv += params.min_len_asv ? " --min_len_asv $params.min_len_asv " : " --min_len_asv 0" filter_len_asv += params.max_len_asv ? " --max_len_asv $params.max_len_asv" : " --max_len_asv 0" diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 1741c481..3d390697 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -680,8 +680,9 @@ workflow AMPLISEQ { DADA2_MERGE.out.fasta, DADA2_MERGE.out.dada2asv, DADA2_MERGE.out.dada2stats, - !params.skip_barrnap ? BARRNAP.out.gff.collect(sort: true) : [], !params.skip_barrnap ? BARRNAPSUMMARY.out.summary : [], + params.filter_ssu ? FILTER_SSU.out.stats : [], + params.filter_ssu ? FILTER_SSU.out.asv : [], params.min_len_asv || params.max_len_asv ? FILTER_LEN_ASV.out.stats : [], params.min_len_asv || params.max_len_asv ? FILTER_LEN_ASV.out.len_orig : [], params.filter_codons ? FILTER_CODONS.out.stats : [], From bd7de174b2bb9b6b8affacdd3fb9457367fc3e24 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 27 Jun 2023 13:26:11 +0200 Subject: [PATCH 040/230] fix with --skip_multiqc --- modules/local/summary_report.nf | 2 +- workflows/ampliseq.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index e7ae9ae7..4967b496 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -46,7 +46,7 @@ process SUMMARY_REPORT { script: def single_end = meta.single_end ? "--single_end" : "" - def fastqc = params.skip_fastqc ? "--skip_fastqc" : "--mqc_plot ${mqc_plots}/svg/mqc_fastqc_per_sequence_quality_scores_plot_1.svg" + def fastqc = params.skip_fastqc || params.skip_multiqc ? "--skip_fastqc" : "--mqc_plot ${mqc_plots}/svg/mqc_fastqc_per_sequence_quality_scores_plot_1.svg" def cutadapt = params.skip_cutadapt ? "--skip_cutadapt" : params.retain_untrimmed ? "--retain_untrimmed --ca_sum_path $ca_summary" : "--ca_sum_path $ca_summary" diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 3d390697..ae6d456e 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -669,7 +669,7 @@ workflow AMPLISEQ { SUMMARY_REPORT ( Channel.fromPath("${baseDir}/assets/report_template.Rmd"), Channel.fromPath("${baseDir}/assets/report_styles.css"), - MULTIQC.out.plots, //.collect().flatten().collectFile(name: "mqc_fastqc_per_sequence_quality_scores_plot_1.svg") + !params.skip_multiqc ? MULTIQC.out.plots : [], //.collect().flatten().collectFile(name: "mqc_fastqc_per_sequence_quality_scores_plot_1.svg") !params.skip_cutadapt ? CUTADAPT_WORKFLOW.out.summary.collect() : [], find_truncation_values, DADA2_PREPROCESSING.out.args.first(), From 6027a7917ee37b923574ea3110a261128b4c87e9 Mon Sep 17 00:00:00 2001 From: jtangrot Date: Tue, 27 Jun 2023 14:41:27 +0200 Subject: [PATCH 041/230] Update sbdi-export for PR2 version 5 --- bin/sbdiexport.R | 8 +++++--- bin/sbdiexportreannotate.R | 8 +++++--- lib/WorkflowAmpliseq.groovy | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/bin/sbdiexport.R b/bin/sbdiexport.R index e0a3575d..c80faec6 100755 --- a/bin/sbdiexport.R +++ b/bin/sbdiexport.R @@ -44,8 +44,10 @@ n_samples <- length(colnames(asvs)) - 1 # Read taxonomy table and make sure all expected columns are there taxonomy <- read.delim(taxtable, sep = '\t', stringsAsFactors = FALSE) %>% mutate(Domain = if("Domain" %in% colnames(.)) Domain else '') %>% - mutate(Kingdom = if("Kingdom" %in% colnames(.)) Kingdom else '') %>% - mutate(Phylum = if("Phylum" %in% colnames(.)) Phylum else '') %>% + mutate(Kingdom = if("Kingdom" %in% colnames(.)) Kingdom + else if ("Supergroup" %in% colnames(.)) Supergroup else '') %>% + mutate(Phylum = if("Phylum" %in% colnames(.)) Phylum + else if ("Division" %in% colnames(.)) Division else '') %>% mutate(Class = if("Class" %in% colnames(.)) Class else '') %>% mutate(Order = if("Order" %in% colnames(.)) Order else '') %>% mutate(Family = if("Family" %in% colnames(.)) Family else '') %>% @@ -160,5 +162,5 @@ asvtax <- asvs %>% ) %>% relocate(otu, .after = infraspecificEpithet) %>% relocate(associatedSequences, .before = domain) %>% - select_if(!names(.) %in% c('confidence','domain', 'Species_exact', 'SH', 'BOLD_bin')) %>% + select_if(!names(.) %in% c('confidence','domain', 'Species_exact', 'SH', 'BOLD_bin', 'Supergroup', 'Division', 'Subdivision')) %>% write_tsv("asv-table.tsv", na = '') diff --git a/bin/sbdiexportreannotate.R b/bin/sbdiexportreannotate.R index ccbc958a..130fe48c 100755 --- a/bin/sbdiexportreannotate.R +++ b/bin/sbdiexportreannotate.R @@ -47,8 +47,10 @@ predictions <- data.frame( taxtable <- taxonomy %>% inner_join(predictions, by = 'ASV_ID') %>% mutate(Domain = if("Domain" %in% colnames(.)) Domain else '') %>% - mutate(Kingdom = if("Kingdom" %in% colnames(.)) Kingdom else '') %>% - mutate(Phylum = if("Phylum" %in% colnames(.)) Phylum else '') %>% + mutate(Kingdom = if("Kingdom" %in% colnames(.)) Kingdom + else if ("Supergroup" %in% colnames(.)) Supergroup else '') %>% + mutate(Phylum = if("Phylum" %in% colnames(.)) Phylum + else if ("Division" %in% colnames(.)) Division else '') %>% mutate(Class = if("Class" %in% colnames(.)) Class else '') %>% mutate(Order = if("Order" %in% colnames(.)) Order else '') %>% mutate(Family = if("Family" %in% colnames(.)) Family else '') %>% @@ -122,5 +124,5 @@ taxtable <- taxonomy %>% relocate(infraspecificEpithet, .after = specificEpithet) %>% relocate(annotation_confidence, .after = otu) %>% relocate(date_identified:taxon_remarks, .after = annotation_confidence) %>% - select_if(!names(.) %in% c('domain', 'species_exact', 'SH', 'BOLD_bin')) %>% + select_if(!names(.) %in% c('domain', 'species_exact', 'SH', 'BOLD_bin', 'Supergroup', 'Division', 'Subdivision')) %>% write_tsv("annotation.tsv", na = '') diff --git a/lib/WorkflowAmpliseq.groovy b/lib/WorkflowAmpliseq.groovy index 47454fd0..eaf933a9 100755 --- a/lib/WorkflowAmpliseq.groovy +++ b/lib/WorkflowAmpliseq.groovy @@ -88,7 +88,7 @@ class WorkflowAmpliseq { Nextflow.error("Incompatible parameters: `--filter_ssu` cannot be used with `--skip_barrnap` because filtering for SSU's depends on barrnap.") } - String[] sbdi_compatible_databases = ["coidb","coidb=221216","gtdb","gtdb=R07-RS207","gtdb=R06-RS202","gtdb=R05-RS95","midori2-co1","midori2-co1=gb250","pr2=4.14.0","pr2=4.13.0","rdp","rdp=18","sbdi-gtdb","sbdi-gtdb=R07-RS207-1","silva","silva=138","silva=132","unite-fungi","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2","unite-alleuk","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2"] + String[] sbdi_compatible_databases = ["coidb","coidb=221216","gtdb","gtdb=R07-RS207","gtdb=R06-RS202","gtdb=R05-RS95","midori2-co1","midori2-co1=gb250","pr2","pr2=5.0.0","pr2=4.14.0","pr2=4.13.0","rdp","rdp=18","sbdi-gtdb","sbdi-gtdb=R07-RS207-1","silva","silva=138","silva=132","unite-fungi","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2","unite-alleuk","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2"] if ( params.sbdiexport && !Arrays.stream(sbdi_compatible_databases).anyMatch(entry -> params.dada_ref_taxonomy.toString().equals(entry)) ) { Nextflow.error("Incompatible parameters: `--sbdiexport` does not work with the chosen database of `--dada_ref_taxonomy`, because the expected taxonomic levels do not match.") } From c993c36f07235a5bd46acd5bcba8af985bb19b3a Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 27 Jun 2023 14:59:53 +0200 Subject: [PATCH 042/230] add ITSx report --- assets/report_template.Rmd | 66 ++++++++++++++++++++++++++++++++- bin/generate_report.R | 5 +++ modules/local/summary_report.nf | 3 ++ workflows/ampliseq.nf | 1 + 4 files changed, 73 insertions(+), 2 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 2678089e..2854dd42 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -34,6 +34,7 @@ params: filter_ssu: "" min_len_asv: "" max_len_asv: "" + cut_its: "" # file paths mqc_plot: "" @@ -56,6 +57,7 @@ params: filter_len_asv_len_orig: "" filter_codons: "" stop_codons: "" + itsx_cutasv_summary: "" ref_tax_path: "" dada2_taxonomy: "" sintax_taxonomy: "" @@ -140,6 +142,8 @@ datatable(cutadapt_summary, options = list( # xlab("Samples") + # coord_flip() + # theme_bw() + +cat("\n\nCutadapt results can be found in folder [cutadapt](../cutadapt).") ``` ## Quality filtering using DADA2 @@ -160,7 +164,7 @@ if (params$trunc_qmin != -1) { "forward reads at ", tr_len_f, " bp and reverse ", "reads at ", tr_len_r, " bp, reads shorter than this are discarded. ", sep = "") } else if (params$trunclenf == "null" && params$trunclenr == "null") { - cat("Reads were not trimmed.") + cat("Reads were not trimmed. ") } else if (params$trunclenf != 0 && params$trunclenr != 0) { cat("Forward reads were trimmed at ", params$trunclenf, " bp and reverse reads were trimmed at ", params$trunclenr, @@ -399,6 +403,8 @@ ggplot(barrnap_df_sum, xlab("rRNA origins") + coord_flip() + theme_bw() + +cat("\n\nrRNA filter results can be found in folder [barrnap](../barrnap).") ``` ```{r, results='asis'} @@ -406,7 +412,7 @@ flag_filter_ssu <- !params$flag_skip_barrnap && isTRUE(params$filter_ssu != "non ``` ```{r, eval = flag_filter_ssu, results='asis'} -cat("ASVs were filtered for (",params$filter_ssu,") using the above classification.", +cat("\n\nASVs were filtered for (",params$filter_ssu,") using the above classification.", "The following table shows read counts for each sample before and after filtering:\n\n", sep = "") # Read the barrnap stats file @@ -494,6 +500,8 @@ datatable(filter_len_stats, options = list( cat("In average", filter_len_stats_avg_removed, "% reads were removed, but at most",filter_len_stats_max_removed,"% reads per sample. ") cat("The number of ASVs was reduced by",sum(filter_len_profile$Counts)-sum(filter_len_asv_filtered$Counts),"(",100-round( sum(filter_len_asv_filtered$Counts)/sum(filter_len_profile$Counts)*100 ,2),"%), from",sum(filter_len_profile$Counts),"to",sum(filter_len_asv_filtered$Counts)," ASVs.") + +cat("\n\nLength filter results can be found in folder [asv_length_filter](../asv_length_filter).") ``` ```{r, results='asis'} @@ -519,6 +527,8 @@ datatable(filter_codons_stats, options = list( paging = FALSE)) #TODO: add ASV count after filtering + +cat("\n\nCodon usage filter results can be found in folder [codon_filter](../codon_filter).") ``` ```{r, results='asis'} @@ -531,6 +541,58 @@ any_taxonomy <- params$flag_dada2_taxonomy || params$flag_qiime2_taxonomy || par cat("# Taxonomic Classification\n") ``` +```{r, results='asis'} +# Check if ITSX was used +flag_itsx_cutasv <- isTRUE(params$cut_its != "none") +``` + +```{r, eval = flag_itsx_cutasv, results='asis'} +cat("## ITS region\n") +cat("The ITS region was extracted from each ASV sequence using ITSx.", + "Taxonomic classification should have improved performance based on extracted ITS sequence.\n") +cat("The extracted ITS region is",params$cut_its,"sequence. ") + +# Read ITSX summary +itsx_summary <- readLines(params$itsx_cutasv_summary) + +origins = FALSE +itsx_origins <- data.frame(origin=character(), count=numeric(), stringsAsFactors=FALSE) +for (line in itsx_summary){ + # get basic statistic + if (grepl("Number of sequences in input file:", line)) { + itsx_summary_nasv <- as.numeric( sub("Number of sequences in input file: *\t*", "", line) ) + } + if (grepl("Sequences detected as ITS by ITSx:", line)) { + itsx_summary_its <- as.numeric( sub("Sequences detected as ITS by ITSx: *\t*", "", line) ) + } + # get preliminar origins + if (grepl("----------------------------", line)) { + origins = FALSE + } + if (isTRUE(origins)) { + add <- data.frame(origin=sub(":.*", "", line), count=as.numeric( sub(".*: *\t*", "", line) ) ) + itsx_origins <- rbind(itsx_origins, add) + } + if (grepl("ITS sequences by preliminary origin:", line)) { + origins = TRUE + } +} +itsx_origins$percent <- round( itsx_origins$count / itsx_summary_nasv * 100, 2) + +cat(itsx_summary_its, "of",itsx_summary_nasv,"(",round( itsx_summary_its/itsx_summary_nasv*100 ,2),"%) ASVs were identified as ITS.", + "The following plot shows ITS sequences by preliminary origin:") + +ggplot(itsx_origins, + aes(x = origin, y = percent)) + + geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) + + ylab("%") + + xlab("ITS sequences by preliminary origin") + + coord_flip() + + theme_bw() + +cat("\n\nITSx results can be found in folder [itsx](../itsx).") +``` + ```{r, eval = params$flag_dada2_taxonomy, results='asis'} cat("## Taxonomic Classification using DADA2\n") diff --git a/bin/generate_report.R b/bin/generate_report.R index 69b7cdb6..9cf5dbc7 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -43,6 +43,8 @@ option_list = list( make_option(c("--filter_codons"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--stop_codons"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--filter_len_asv_len_orig"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--itsx_cutasv_summary"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--cut_its"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--ref_tax_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--dada2_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--flag_dada2_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), @@ -95,6 +97,9 @@ rmarkdown::render(opt$report, output_file = opt$output, filter_len_asv_len_orig = opt$filter_len_asv_len_orig, filter_codons = opt$filter_codons, stop_codons = opt$stop_codons, + itsx_cutasv_summary = opt$itsx_cutasv_summary, + cut_its = opt$cut_its, + cut_its = opt$cut_its, ref_tax_path = opt$ref_tax_path, dada2_taxonomy = opt$dada2_taxonomy, flag_dada2_taxonomy = opt$flag_dada2_taxonomy, diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 4967b496..b58cea47 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -30,6 +30,7 @@ process SUMMARY_REPORT { path(filter_len_asv_stats) path(filter_len_asv_len_orig) path(filter_codons_stats) + path(itsx_cutasv_summary) path(dada2_tax_reference) path(dada2_tax) path(sintax_tax) @@ -62,6 +63,7 @@ process SUMMARY_REPORT { filter_len_asv += params.min_len_asv ? " --min_len_asv $params.min_len_asv " : " --min_len_asv 0" filter_len_asv += params.max_len_asv ? " --max_len_asv $params.max_len_asv" : " --max_len_asv 0" def filter_codons = filter_codons_stats ? "--filter_codons $filter_codons_stats --stop_codons $params.stop_codons" : "" + def itsx_cutasv = itsx_cutasv_summary ? "--itsx_cutasv_summary $itsx_cutasv_summary --cut_its $params.cut_its" : "--cut_its none" def dada2_taxonomy = !dada2_tax ? "" : params.dada_ref_tax_custom ? "--flag_dada2_taxonomy --dada2_taxonomy $dada2_tax --ref_tax_user" : "--flag_dada2_taxonomy --dada2_taxonomy $dada2_tax --ref_tax_path $dada2_tax_reference" def sintax_taxonomy = sintax_tax ? "--flag_sintax_taxonomy --sintax_taxonomy $sintax_tax" : "" @@ -87,6 +89,7 @@ process SUMMARY_REPORT { --max_ee $params.max_ee \\ $filter_len_asv \\ $filter_codons \\ + $itsx_cutasv \\ $dada2_taxonomy \\ $sintax_taxonomy \\ $pplace_taxonomy \\ diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index ae6d456e..2796b554 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -686,6 +686,7 @@ workflow AMPLISEQ { params.min_len_asv || params.max_len_asv ? FILTER_LEN_ASV.out.stats : [], params.min_len_asv || params.max_len_asv ? FILTER_LEN_ASV.out.len_orig : [], params.filter_codons ? FILTER_CODONS.out.stats : [], + params.cut_its != "none" ? ITSX_CUTASV.out.summary : [], !params.skip_taxonomy && params.dada_ref_taxonomy && !params.skip_dada_taxonomy && !params.dada_ref_tax_custom ? FORMAT_TAXONOMY.out.ref_tax_info : [], !params.skip_taxonomy && params.dada_ref_taxonomy && !params.skip_dada_taxonomy ? ch_dada2_tax : [], !params.skip_taxonomy && params.sintax_ref_taxonomy ? ch_sintax_tax : [], From 49cd0a497d63dece8fcf7ae7b3134da98973865e Mon Sep 17 00:00:00 2001 From: jtangrot Date: Wed, 28 Jun 2023 08:44:20 +0200 Subject: [PATCH 043/230] Update requirements for sbdiexpoort --- lib/WorkflowAmpliseq.groovy | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/WorkflowAmpliseq.groovy b/lib/WorkflowAmpliseq.groovy index 47454fd0..55e0fd66 100755 --- a/lib/WorkflowAmpliseq.groovy +++ b/lib/WorkflowAmpliseq.groovy @@ -60,11 +60,11 @@ class WorkflowAmpliseq { } } - if (params.dada_assign_taxlevels && params.sbdiexport) { + if (params.dada_assign_taxlevels && params.sbdiexport && !params.sintax_ref_taxonomy) { Nextflow.error("Incompatible parameters: `--sbdiexport` expects specific taxonomics ranks (default) and therefore excludes modifying those using `--dada_assign_taxlevels`.") } - if (params.skip_dada_addspecies && params.sbdiexport) { + if (params.skip_dada_addspecies && params.sbdiexport && !params.sintax_ref_taxonomy) { Nextflow.error("Incompatible parameters: `--sbdiexport` expects species annotation and therefore excludes `--skip_dada_addspecies`.") } @@ -89,8 +89,8 @@ class WorkflowAmpliseq { } String[] sbdi_compatible_databases = ["coidb","coidb=221216","gtdb","gtdb=R07-RS207","gtdb=R06-RS202","gtdb=R05-RS95","midori2-co1","midori2-co1=gb250","pr2=4.14.0","pr2=4.13.0","rdp","rdp=18","sbdi-gtdb","sbdi-gtdb=R07-RS207-1","silva","silva=138","silva=132","unite-fungi","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2","unite-alleuk","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2"] - if ( params.sbdiexport && !Arrays.stream(sbdi_compatible_databases).anyMatch(entry -> params.dada_ref_taxonomy.toString().equals(entry)) ) { - Nextflow.error("Incompatible parameters: `--sbdiexport` does not work with the chosen database of `--dada_ref_taxonomy`, because the expected taxonomic levels do not match.") + if (params.sbdiexport && (!Arrays.stream(sbdi_compatible_databases).anyMatch(entry -> params.dada_ref_taxonomy.toString().equals(entry)) || (params.sintax_ref_taxonomy && !Arrays.stream(sbdi_compatible_databases).anyMatch(entry -> params.sintax_ref_taxonomy.toString().equals(entry)) )) { + Nextflow.error("Incompatible parameters: `--sbdiexport` does not work with the chosen database of `--dada_ref_taxonomy`/`--sintax_ref_taxonomy` because the expected taxonomic levels do not match.") } if (params.addsh && !params.dada_ref_databases[params.dada_ref_taxonomy]["shfile"]) { From 5d2d6b244f4feb12473b38eaf847ec946aa58806 Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 28 Jun 2023 15:40:14 +0200 Subject: [PATCH 044/230] fix stacked barplot of fractions lost per DADA2 step --- assets/report_template.Rmd | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 2854dd42..e06a4737 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -260,8 +260,6 @@ Samples with unusual low reads numbers relative to the number of expected ASVs ( should be treated cautiously, because the abundance estimate will be very granular and might vary strongly between (theoretical) replicates due to high impact of stochasticity. -Stacked barcharts of read numbers per sample and processing stage (see above): - ```{r, results='asis'} # Stacked barchart to num of reads @@ -269,13 +267,15 @@ Stacked barcharts of read numbers per sample and processing stage (see above): if ( params$flag_single_end ) { # single end + cat("Stacked barcharts of read numbers per sample and processing stage (see above):\n\n") + dada_stats_ex <- data.frame(sample = dada_stats$sample, input = dada_stats$DADA2_input, filtered = dada_stats$DADA2_input-dada_stats$filtered, denoised = dada_stats$filtered-dada_stats$denoised, nonchim = dada_stats$denoised-dada_stats$nonchim, analysis = dada_stats$nonchim) - dada_stats_p <- data.frame(sample = dada_stats_ex$sample, round(dada_stats_ex[2:6]/dada_stats_ex$input, 2)) + dada_stats_p <- data.frame(sample = dada_stats_ex$sample, round(dada_stats_ex[2:6]/dada_stats_ex$input*100, 2)) n_samples <- length(dada_stats_p$sample) # Stack columns for both stacked barcharts samples_t <- c(rep(dada_stats_p$sample, 4)) @@ -289,15 +289,17 @@ if ( params$flag_single_end ) { dada_stats_p_t <- data.frame(samples_t, steps_t, asvs_p_t) } else { # paired end + cat("Stacked barcharts of read pair numbers (denoisedF & denoisedR halfed, because each pair is split) per sample and processing stage (see above):\n\n") + dada_stats_ex <- data.frame(sample = dada_stats$sample, DADA2_input = dada_stats$DADA2_input, filtered = dada_stats$DADA2_input-dada_stats$filtered, - denoisedF = dada_stats$filtered-dada_stats$denoisedF, - denoisedR = dada_stats$denoisedF-dada_stats$denoisedR, - merged = dada_stats$denoisedR-dada_stats$merged, + denoisedF = (dada_stats$filtered-dada_stats$denoisedF)/2, + denoisedR = (dada_stats$filtered-dada_stats$denoisedR)/2, + merged = (dada_stats$denoisedF+dada_stats$denoisedR)/2-dada_stats$merged, nonchim = dada_stats$merged-dada_stats$nonchim, analysis = dada_stats$nonchim) - dada_stats_p <- data.frame(sample = dada_stats_ex$sample, round(dada_stats_ex[2:8]/dada_stats_ex$DADA2_input, 2)) + dada_stats_p <- data.frame(sample = dada_stats_ex$sample, round(dada_stats_ex[2:8]/dada_stats_ex$DADA2_input*100, 2)) # Stack columns for both stacked barcharts n_samples <- length(dada_stats_p$sample) samples_t <- c(rep(dada_stats_p$sample, 6)) @@ -326,7 +328,7 @@ dada_stats_p_t$steps_t <- factor(dada_stats_p_t$steps_t, levels=unique(dada_stat ggplot(dada_stats_p_t, aes(fill = steps_t, y = asvs_p_t, x = samples_t)) + geom_bar(position = "fill", stat = "identity") + xlab("Samples") + - ylab("% of total reads") + + ylab("Fraction of total reads") + coord_flip() + scale_fill_brewer("Filtering Steps", palette = "Spectral") ``` From 04b43c11ef893273ee91eb2dd75dbe1b22fdd0f4 Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 28 Jun 2023 16:34:57 +0200 Subject: [PATCH 045/230] text changes --- assets/report_template.Rmd | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index e06a4737..3033522d 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -162,19 +162,21 @@ if (params$trunc_qmin != -1) { "below ", params$trunc_qmin, " and at least ",params$trunc_rmin*100, "% of reads are retained, resulting in a trim of ", "forward reads at ", tr_len_f, " bp and reverse ", - "reads at ", tr_len_r, " bp, reads shorter than this are discarded. ", sep = "") + "reads at ", tr_len_r, " bp, reads shorter than this were discarded. ", sep = "") } else if (params$trunclenf == "null" && params$trunclenr == "null") { cat("Reads were not trimmed. ") } else if (params$trunclenf != 0 && params$trunclenr != 0) { cat("Forward reads were trimmed at ", params$trunclenf, " bp and reverse reads were trimmed at ", params$trunclenr, - " bp, reads shorter than this are discarded. ", sep = "") + " bp, reads shorter than this were discarded. ", sep = "") } else if (params$trunclenf != 0) { - cat("Forward reads were trimmed at ", params$trunclenf," bp, reads shorter than this are discarded. ", sep = "") + cat("Forward reads were trimmed at ", params$trunclenf," bp, reads shorter than this were discarded. ", sep = "") } else if (params$trunclenr != 0) { - cat("Reverse reads were trimmed at ", params$trunclenr," bp, reads shorter than this are discarded. ", sep = "") + cat("Reverse reads were trimmed at ", params$trunclenr," bp, reads shorter than this were discarded. ", sep = "") } -cat("Reads with more than", params$max_ee,"expected errors were discarded.", sep = " ") +cat("Reads with more than", params$max_ee,"expected errors were discarded.", + "Read counts passing the filter are shown in section ['Read counts per sample'](#read-counts-per-sample)", + "column 'filtered'.", sep = " ") ``` ```{r, eval = !params$flag_skip_dada_quality, results='asis'} From 07a32c0f63e31af316601785dd875aa26b983c2c Mon Sep 17 00:00:00 2001 From: jtangrot Date: Thu, 29 Jun 2023 09:19:49 +0200 Subject: [PATCH 046/230] Set kingdom to unassigned if no taxonomy --- bin/sbdiexport.R | 2 +- bin/sbdiexportreannotate.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/sbdiexport.R b/bin/sbdiexport.R index c80faec6..7abf01b1 100755 --- a/bin/sbdiexport.R +++ b/bin/sbdiexport.R @@ -152,7 +152,7 @@ asvtax <- asvs %>% mutate( domain = str_remove(domain, 'Reversed:_'), associatedSequences = '', - kingdom = ifelse(is.na(kingdom), 'Unassigned', kingdom), + kingdom = ifelse(is.na(kingdom) | kingdom == '', 'Unassigned', kingdom), specificEpithet = ifelse(!(is.na(Species_exact) | Species_exact == ''), Species_exact, specificEpithet), specificEpithet = ifelse( (!(is.na(genus) | genus == '')), str_replace(specificEpithet, paste('^',genus, '[_[:space:]]' ,sep=''), ''), specificEpithet), specificEpithet = ifelse( str_detect(specificEpithet, '^[sS]p{1,2}.?$'), '', specificEpithet), diff --git a/bin/sbdiexportreannotate.R b/bin/sbdiexportreannotate.R index 130fe48c..42f468bd 100755 --- a/bin/sbdiexportreannotate.R +++ b/bin/sbdiexportreannotate.R @@ -117,7 +117,7 @@ taxtable <- taxonomy %>% ), identification_references = 'https://docs.biodiversitydata.se/analyse-data/molecular-tools/#taxonomy-annotation', taxon_remarks = ifelse(!(is.na(domain) | domain == ''), paste('Domain = \'',domain,'\'',sep=''),''), - kingdom = ifelse(is.na(kingdom), 'Unassigned', kingdom) + kingdom = ifelse(is.na(kingdom) | kingdom == '', 'Unassigned', kingdom) ) %>% relocate(asv_sequence, .after = asv_id_alias) %>% relocate(scientificName:taxonRank, .after = asv_sequence) %>% From 657b24147db83d7ea72fa7157d7873a2f32fa249 Mon Sep 17 00:00:00 2001 From: jtangrot Date: Thu, 29 Jun 2023 10:28:25 +0200 Subject: [PATCH 047/230] Fix typo --- docs/output.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index 2d479272..ceecf26d 100644 --- a/docs/output.md +++ b/docs/output.md @@ -510,7 +510,7 @@ Most of the fields in the template will not be populated by the export process, Output files - `SBDI/` - - `annotation.tsv`: SBDI specific output for taxonomi reannotation, not used in submission to SBDI. + - `annotation.tsv`: SBDI specific output for taxonomic reannotation, not used in submission to SBDI. - `asv-table.tsv`: asv-table tab of template. - `emof.tsv`: emof tab of template. - `event.tsv`: event tab of template. From 8ba288d81b4566330473f0e33b0a508925916151 Mon Sep 17 00:00:00 2001 From: jtangrot Date: Thu, 29 Jun 2023 10:31:14 +0200 Subject: [PATCH 048/230] Update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a6a26885..c52dae40 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +- [#605](https://github.com/nf-core/ampliseq/pull/605) - Make `--sbdiexport` compatible with PR2 version 5.0.0 + ### `Dependencies` ### `Removed` From e8ef9cb7da20c1038717e6024e1d5d3c19fa2e66 Mon Sep 17 00:00:00 2001 From: jtangrot Date: Thu, 29 Jun 2023 10:48:53 +0200 Subject: [PATCH 049/230] Fix typo --- docs/output.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index ceecf26d..d3d37beb 100644 --- a/docs/output.md +++ b/docs/output.md @@ -520,7 +520,7 @@ Most of the fields in the template will not be populated by the export process, ## Read count report -This report includes information on how many reads per sample passed each pipeline step in which a loss can occur. Specifically, how many read pairs entered cutadapt, were reverse complemented, passed trimming; how many read pairs entered DADA2, were denoised, merged and non-chimeric; and how many counts were lost during excluding unwanted tax and removing low abundance/prevalence sequences in QIIME2. +This report includes information on how many reads per sample passed each pipeline step in which a loss can occur. Specifically, how many read pairs entered cutadapt, were reverse complemented, passed trimming; how many read pairs entered DADA2, were denoised, merged and non-chimeric; and how many counts were lost during excluding unwanted taxa and removing low abundance/prevalence sequences in QIIME2.
Output files From 9c659000e6d4ce6edb567d0a62909e0e2be71753 Mon Sep 17 00:00:00 2001 From: jtangrot Date: Thu, 29 Jun 2023 11:12:42 +0200 Subject: [PATCH 050/230] Fix spaces --- bin/sbdiexport.R | 6 ++---- bin/sbdiexportreannotate.R | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/bin/sbdiexport.R b/bin/sbdiexport.R index 7abf01b1..8885424b 100755 --- a/bin/sbdiexport.R +++ b/bin/sbdiexport.R @@ -44,10 +44,8 @@ n_samples <- length(colnames(asvs)) - 1 # Read taxonomy table and make sure all expected columns are there taxonomy <- read.delim(taxtable, sep = '\t', stringsAsFactors = FALSE) %>% mutate(Domain = if("Domain" %in% colnames(.)) Domain else '') %>% - mutate(Kingdom = if("Kingdom" %in% colnames(.)) Kingdom - else if ("Supergroup" %in% colnames(.)) Supergroup else '') %>% - mutate(Phylum = if("Phylum" %in% colnames(.)) Phylum - else if ("Division" %in% colnames(.)) Division else '') %>% + mutate(Kingdom = if("Kingdom" %in% colnames(.)) Kingdom else if ("Supergroup" %in% colnames(.)) Supergroup else '') %>% + mutate(Phylum = if("Phylum" %in% colnames(.)) Phylum else if ("Division" %in% colnames(.)) Division else '') %>% mutate(Class = if("Class" %in% colnames(.)) Class else '') %>% mutate(Order = if("Order" %in% colnames(.)) Order else '') %>% mutate(Family = if("Family" %in% colnames(.)) Family else '') %>% diff --git a/bin/sbdiexportreannotate.R b/bin/sbdiexportreannotate.R index 42f468bd..19d5e3ae 100755 --- a/bin/sbdiexportreannotate.R +++ b/bin/sbdiexportreannotate.R @@ -47,10 +47,8 @@ predictions <- data.frame( taxtable <- taxonomy %>% inner_join(predictions, by = 'ASV_ID') %>% mutate(Domain = if("Domain" %in% colnames(.)) Domain else '') %>% - mutate(Kingdom = if("Kingdom" %in% colnames(.)) Kingdom - else if ("Supergroup" %in% colnames(.)) Supergroup else '') %>% - mutate(Phylum = if("Phylum" %in% colnames(.)) Phylum - else if ("Division" %in% colnames(.)) Division else '') %>% + mutate(Kingdom = if("Kingdom" %in% colnames(.)) Kingdom else if ("Supergroup" %in% colnames(.)) Supergroup else '') %>% + mutate(Phylum = if("Phylum" %in% colnames(.)) Phylum else if ("Division" %in% colnames(.)) Division else '') %>% mutate(Class = if("Class" %in% colnames(.)) Class else '') %>% mutate(Order = if("Order" %in% colnames(.)) Order else '') %>% mutate(Family = if("Family" %in% colnames(.)) Family else '') %>% From 4771844d06ed4ad65e4780637d87a64e319ab044 Mon Sep 17 00:00:00 2001 From: daniel Date: Thu, 29 Jun 2023 13:25:37 +0200 Subject: [PATCH 051/230] display error profiles of multiple runs --- assets/report_template.Rmd | 27 ++++++++++++++++++++++++--- bin/generate_report.R | 8 ++++---- modules/local/summary_report.nf | 7 ++++++- workflows/ampliseq.nf | 15 ++++++++++++++- 4 files changed, 48 insertions(+), 9 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 3033522d..432262e3 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -44,8 +44,8 @@ params: dada_qc_r_path: "" dada_pp_qc_f_path: "" dada_pp_qc_r_path: "" - dada_1_err_path: "" - dada_2_err_path: "" + dada_err_path: "" + dada_err_run: "" asv_table_path: "" path_asv_fa: "" path_dada2_tab: "" @@ -225,8 +225,29 @@ read pair merging (for paired end Illumina reads only) and PCR chimera removal. Read error correction was performed using estimated error rates, visualized below. +```{r, results='asis'} +# check if single run or multirun +flag_multirun = length ( unlist( strsplit( params$dada_err_run,"," ) ) ) != 1 + +if ( flag_multirun && params$flag_single_end ) { + # single end multi run + cat("Error rates were estimated for each sequencing run separately. ", + "Each 4x4 figure represents one run, in the sequence ", params$dada_err_run,".") +} else if ( flag_multirun && !params$flag_single_end ) { + # paired end multi run + cat("Error rates were estimated for each sequencing run separately. ", + "Each row represents one run, in the sequence ", params$dada_err_run,".", + "For each row, the error rates for forward reads are at the left side and reverse reads are at the right side.") +} else if ( !flag_multirun && !params$flag_single_end ) { + # paired end single run + cat("Error rates for forward reads are at the left side and reverse reads are at the right side.") + +} +``` + ```{r, out.width="49%", fig.show='hold', fig.align='default'} -knitr::include_graphics(c(params$dada_1_err_path, params$dada_2_err_path)) +dada_err_path <- unlist( strsplit( params$dada_err_path,"," ) ) +knitr::include_graphics(dada_err_path) ``` Estimated error rates for each possible transition. The black line shows the estimated error rates after diff --git a/bin/generate_report.R b/bin/generate_report.R index 9cf5dbc7..ee840da2 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -27,8 +27,8 @@ option_list = list( make_option(c("--dada_qc_r_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--dada_pp_qc_f_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--dada_pp_qc_r_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--dada_1_err_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--dada_2_err_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--dada_err_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--dada_err_run"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--asv_table_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--path_asv_fa"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--path_dada2_tab"), type="character", default=NULL, help="MultiQC plots", metavar="character"), @@ -81,8 +81,8 @@ rmarkdown::render(opt$report, output_file = opt$output, dada_qc_r_path = opt$dada_qc_r_path, dada_pp_qc_f_path = opt$dada_pp_qc_f_path, dada_pp_qc_r_path = opt$dada_pp_qc_r_path, - dada_1_err_path = opt$dada_1_err_path, - dada_2_err_path = opt$dada_2_err_path, + dada_err_path = opt$dada_err_path, + dada_err_run = opt$dada_err_run, asv_table_path = opt$asv_table_path, path_asv_fa = opt$path_asv_fa, path_dada2_tab = opt$path_dada2_tab, diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index b58cea47..9c72edc8 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -56,7 +56,12 @@ process SUMMARY_REPORT { meta.single_end ? "--dada_qc_f_path $dada_qual_stats --dada_pp_qc_f_path $dada_pp_qual_stats" : "--dada_qc_f_path 'FW_qual_stats.svg' --dada_qc_r_path 'RV_qual_stats.svg' --dada_pp_qc_f_path 'FW_preprocessed_qual_stats.svg' --dada_pp_qc_r_path 'RV_preprocessed_qual_stats.svg'" def find_truncation = find_truncation_values ? "--trunc_qmin $params.trunc_qmin --trunc_rmin $params.trunc_rmin" : "" - def dada_err = meta.single_end ? "--dada_1_err_path $dada_err_svgs" : "--dada_1_err_path ${dada_err_svgs[0]} --dada_2_err_path ${dada_err_svgs[1]}" + // make comma separated list of error profile path when multiple sequencing runs were performed + if ( meta.run.size() == 1 && meta.single_end ) { + dada_err = "--dada_err_path $dada_err_svgs --dada_err_run " + meta.run + } else { + dada_err = "--dada_err_path " + dada_err_svgs.join(',') + " --dada_err_run " + meta.run.join(',') + } def barrnap = params.skip_barrnap ? "--skip_barrnap" : "--path_barrnap_sum $barrnap_summary" barrnap += filter_ssu_stats ? " --filter_ssu_stats $filter_ssu_stats --filter_ssu_asv $filter_ssu_asv --filter_ssu $params.filter_ssu" : " --filter_ssu none" def filter_len_asv = filter_len_asv_stats ? "--filter_len_asv $filter_len_asv_stats --filter_len_asv_len_orig $filter_len_asv_len_orig" : "" diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 2796b554..36b33a75 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -675,7 +675,20 @@ workflow AMPLISEQ { DADA2_PREPROCESSING.out.args.first(), !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg : [], !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg_preprocessed : [], - DADA2_ERR.out.svg, + DADA2_ERR.out.svg + .map { + meta_old, svgs -> + def meta = [:] + meta.single_end = meta_old.single_end + [ meta, svgs, meta_old.run ] } + .groupTuple(by: 0 ) + .map { + meta_old, svgs, runs -> + def meta = [:] + meta.single_end = meta_old.single_end + meta.run = runs.flatten() + [ meta, svgs.flatten() ] + }, DADA2_MERGE.out.asv, DADA2_MERGE.out.fasta, DADA2_MERGE.out.dada2asv, From 958a52033a494428c6f5aea83559eba6130f9baf Mon Sep 17 00:00:00 2001 From: jtangrot Date: Thu, 29 Jun 2023 14:58:38 +0200 Subject: [PATCH 052/230] Add check for valid sintax_ref_taxonomy --- lib/WorkflowMain.groovy | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 3cd7c5fd..ce4333e5 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -57,9 +57,12 @@ class WorkflowMain { } // Check that keys for reference databases are valid - if (params.dada_ref_taxonomy && !params.skip_taxonomy) { + if (params.dada_ref_taxonomy && !params.skip_taxonomy && !params.skip_dada_taxonomy) { dadareftaxonomyExistsError(params, log) } + if (params.sintax_ref_taxonomy && !params.skip_taxonomy) { + sintaxreftaxonomyExistsError(params, log) + } if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { qiimereftaxonomyExistsError(params, log) } @@ -97,7 +100,7 @@ class WorkflowMain { private static void dadareftaxonomyExistsError(params, log) { if (params.dada_ref_databases && params.dada_ref_taxonomy && !params.dada_ref_databases.containsKey(params.dada_ref_taxonomy)) { def error_string = "=============================================================================\n" + - " DADA2 reference database '${params.dada_ref_taxonomy}' not found in any config files provided to the pipeline.\n" + + " DADA2 reference database '${params.dada_ref_taxonomy}' not found in any config file provided to the pipeline.\n" + " Currently, the available reference taxonomy keys for `--dada_ref_taxonomy` are:\n" + " ${params.dada_ref_databases.keySet().join(", ")}\n" + "===================================================================================" @@ -105,12 +108,25 @@ class WorkflowMain { } } // + // Exit pipeline if incorrect --sintax_ref_taxonomy key provided + // + private static void sintaxreftaxonomyExistsError(params, log) { + if (params.sintax_ref_databases && params.sintax_ref_taxonomy && !params.sintax_ref_databases.containsKey(params.sintax_ref_taxonomy)) { + def error_string = "=============================================================================\n" + + " SINTAX reference database '${params.sintax_ref_taxonomy}' not found in any config file provided to the pipeline.\n" + + " Currently, the available reference taxonomy keys for `--sintax_ref_taxonomy` are:\n" + + " ${params.sintax_ref_databases.keySet().join(", ")}\n" + + "===================================================================================" + Nextflow.error(error_string) + } + } + // // Exit pipeline if incorrect --qiime_ref_taxonomy key provided // private static void qiimereftaxonomyExistsError(params, log) { if (params.qiime_ref_databases && params.qiime_ref_taxonomy && !params.qiime_ref_databases.containsKey(params.qiime_ref_taxonomy)) { def error_string = "=============================================================================\n" + - " QIIME2 reference database '${params.qiime_ref_taxonomy}' not found in any config files provided to the pipeline.\n" + + " QIIME2 reference database '${params.qiime_ref_taxonomy}' not found in any config file provided to the pipeline.\n" + " Currently, the available reference taxonomy keys for `--qiime_ref_taxonomy` are:\n" + " ${params.qiime_ref_databases.keySet().join(", ")}\n" + "===================================================================================" From 8b95cf224219676e4e452bf1696fa3a60ac5f20f Mon Sep 17 00:00:00 2001 From: daniel Date: Thu, 29 Jun 2023 16:00:48 +0200 Subject: [PATCH 053/230] add QIIME2 downstream analysis --- assets/report_template.Rmd | 100 ++++++++++++++++++++++++++++++-- bin/generate_report.R | 18 +++++- modules/local/summary_report.nf | 18 +++++- workflows/ampliseq.nf | 10 +++- 4 files changed, 138 insertions(+), 8 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 432262e3..24ae4cb6 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -26,6 +26,12 @@ params: flag_qiime2_taxonomy: FALSE flag_sintax_taxonomy: FALSE flag_pplace_taxonomy: FALSE + flag_skip_qiime: FALSE + flag_skip_barplot: FALSE + flag_skip_abundance_tables: FALSE + flag_skip_alpha_rarefaction: FALSE + flag_skip_diversity_indices: FALSE + flag_skip_ancom: FALSE trunclenf: "" trunclenr: "" max_ee: "" @@ -63,6 +69,7 @@ params: sintax_taxonomy: "" pplace_taxonomy: "" qiime2_taxonomy: "" + picrust_pathways: "" --- @@ -619,7 +626,7 @@ cat("\n\nITSx results can be found in folder [itsx](../itsx).") ``` ```{r, eval = params$flag_dada2_taxonomy, results='asis'} -cat("## Taxonomic Classification using DADA2\n") +cat("## DADA2\n") if (!params$flag_ref_tax_user) { ref_tax <- readLines(params$ref_tax_path) @@ -687,11 +694,13 @@ ggplot(asv_classi_df, xlab("Levels") + coord_flip() + theme_bw() + +cat("\n\nDADA2 taxonomy assignments can be found in folder [dada2](../dada2) in files 'ASV_tax_*.tsv'.") ``` ```{r, eval = params$flag_qiime2_taxonomy, results='asis'} # Header -cat("## Taxonomic Classification using QIIME2\n") +cat("## QIIME2\n") #TODO: add database information #TODO: only tested for greengenes85, need to test also UNITE and SILVA! @@ -747,11 +756,13 @@ ggplot(asv_classi_df, xlab("Levels") + coord_flip() + theme_bw() + +cat("\n\nQIIME2 taxonomy assignments can be found in folder [qiime2/taxonomy](../qiime2/taxonomy).") ``` ```{r, eval = params$flag_sintax_taxonomy, results='asis'} # Header -cat("## Taxonomic Classification using SINTAX\n") +cat("## SINTAX\n") #TODO: add database information @@ -802,11 +813,13 @@ ggplot(asv_classi_df, xlab("Levels") + coord_flip() + theme_bw() + +cat("\n\nSINTAX taxonomy assignments can be found in folder [sintax](../sintax).") ``` ```{r, eval = params$flag_pplace_taxonomy, results='asis'} # Header -cat("## Taxonomic Classification using Phylogenetic Placement\n") +cat("## Phylogenetic Placement\n") #TODO: add database information @@ -852,4 +865,83 @@ ggplot(asv_classi_df, xlab("Taxonomic levels") + coord_flip() + theme_bw() + +#TODO: *.heattree.tree.svg could be displayed as well! + +cat("\n\nPhylogenetic placement taxonomy assignments can be found in folder [pplace](../pplace) in file '*.taxonomy.per_query_unique.tsv'.") +``` + +```{r, eval = !params$flag_skip_qiime, results='asis'} +# Header +cat("# Downstream analysis with QIIME2\n") + +#TODO: report filtering & qiime2 input files in qiime2/input/ +``` + +```{r, eval = !params$flag_skip_abundance_tables, results='asis'} +cat("## Abundance tables\n", + "The abundance tables are the final data for further downstream analysis and visualisations. The tables are based on the computed ASVs and taxonomic classification, but after removal of unwanted taxa. ", + "Folder [qiime2/abundance_tables](../qiime2/abundance_tables) contains tap-separated files (.tsv) that can be opened by any spreadsheet software.") + +cat("\n\n## Relative abundance tables\n", + "Absolute abundance tables produced by the previous steps contain count data, but the compositional nature of 16S rRNA amplicon sequencing requires sequencing depth normalisation. This step computes relative abundance tables using TSS (Total Sum Scaling normalisation) for various taxonomic levels and detailed tables for all ASVs with taxonomic classification, sequence and relative abundance for each sample. Typically used for in depth investigation of taxa abundances. ", + "Folder [qiime2/rel_abundance_tables](../qiime2/rel_abundance_tables) contains tap-separated files (.tsv) that can be opened by any spreadsheet software.") +``` + +```{r, eval = !params$flag_skip_barplot, results='asis'} +cat("## Barplot\n", + "Interactive abundance plot that aids exploratory browsing the discovered taxa and their abundance", + "in samples and allows sorting for associated meta data.", + "Folder [qiime2/barplot](../qiime2/barplot) contains barplots, click [qiime2/barplot/index.html](../qiime2/barplot/index.html) to open it in your web browser.") +``` + +```{r, eval = !params$flag_skip_alpha_rarefaction, results='asis'} +cat("## Alpha diversity rarefaction curves\n", + "Produces rarefaction plots for several alpha diversity indices, and is primarily used to determine if the richness of the samples has been fully observed or sequenced. If the slope of the curves does not level out and the lines do not becomes horizontal, this might be because the sequencing depth was too low to observe all diversity or that sequencing error artificially increases sequence diversity and causes false discoveries. ", + "Folder [qiime2/alpha-rarefaction](../qiime2/alpha-rarefaction) contains the data, click [qiime2/alpha-rarefaction/index.html](../qiime2/alpha-rarefaction/index.html) to open it in your web browser.") +#TODO: highlight DADA2's pooling vs independent +``` + +```{r, eval = !params$flag_skip_diversity_indices, results='asis'} +cat("## Diversity analysis\n", + "Diversity measures summarize important sample features (alpha diversity) or differences between samples (beta diversity). To do so, sample data is first rarefied to the minimum number of counts per sample. ", + "\n### Alpha diversity indices\n", + "Alpha diversity measures the species diversity within samples. Diversity calculations are based on sub-sampled data rarefied to the minimum read count of all samples. This step calculates alpha diversity using various methods and performs pairwise comparisons of groups of samples. It is based on a phylogenetic tree of all ASV sequences. ", + "Folder [qiime2/diversity/alpha_diversity](../qiime2/diversity/alpha_diversity) contains the alpha-diverity data:\n", + "- Shannon’s diversity index (quantitative): [qiime2/diversity/alpha_diversity/shannon_vector/index.html](../qiime2/diversity/alpha_diversity/shannon_vector/index.html)\n", + "- Pielou’s Evenness: [qiime2/diversity/alpha_diversity/evenness_vector/index.html](../qiime2/diversity/alpha_diversity/evenness_vector/index.html)\n", + "- Faith’s Phylogenetic Diversity (qualitiative, phylogenetic) [qiime2/diversity/alpha_diversity/faith_pd_vector/index.html](../qiime2/diversity/alpha_diversity/faith_pd_vector/index.html)\n", + "- Observed OTUs (qualitative): [qiime2/diversity/alpha_diversity/observed_otus_vector/index.html](../qiime2/diversity/alpha_diversity/observed_otus_vector/index.html)\n", + "\n### Beta diversity indices\n", + "Beta diversity measures the species community differences between samples. Diversity calculations are based on sub-sampled data rarefied to the minimum read count of all samples. This step calculates beta diversity distances using various methods and performs pairwise comparisons of groups of samples. Additionally, principle coordinates analysis (PCoA) plots are produced that can be visualized with Emperor in your default browser without the need for installation. This calculations are based on a phylogenetic tree of all ASV sequences. ", + "Folder [qiime2/diversity/beta_diversity](../qiime2/diversity/beta_diversity) contains the beta-diverity data:\n", + "(1) PCoA for four different beta diversity distances are accessible via:", + "- Bray-Curtis distance (quantitative): [qiime2/diversity/beta_diversity/bray_curtis_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/bray_curtis_pcoa_results-PCoA/index.html)\n", + "- Jaccard distance (qualitative): [qiime2/diversity/beta_diversity/jaccard_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/jaccard_pcoa_results-PCoA/index.html)\n", + "- unweighted UniFrac distance (qualitative, phylogenetic) [qiime2/diversity/beta_diversity/unweighted_unifrac_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/unweighted_unifrac_pcoa_results-PCoA/index.html)\n", + "- weighted UniFrac distance (quantitative, phylogenetic): [qiime2/diversity/beta_diversity/weighted_unifrac_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/weighted_unifrac_pcoa_results-PCoA/index.html)\n", + "(2) Pairwise comparisons of groups of samples is performed for specific metadata that can be found in folder [qiime2/diversity/beta_diversity/](../qiime2/diversity/beta_diversity/). Each folder named '{method}_distance_matrix-{treatment}' contains an 'index.html' that allows to view the result of the statistical test for the diversity method between treatments.", + sep = "\n") +#TODO: adonis test is missing here! +#TODO: report rarefaction depth, note phylogenetic tree & phylogenetic placement, highlight DADA2's pooled method +``` + +```{r, eval = !params$flag_skip_ancom, results='asis'} +cat("## ANCOM\n", + "Analysis of Composition of Microbiomes (ANCOM) is applied to identify features that are differentially abundant across sample groups. A key assumption made by ANCOM is that few taxa (less than about 25%) will be differentially abundant between groups otherwise the method will be inaccurate.", + "Comparisons between groups of samples is performed for specific metadata that can be found in folder [qiime2/ancom/](../qiime2/ancom/). Each folder named 'Category-{treatment}-{taxonomic level}' contains an 'index.html' that allows to view the result of the statistical test between treatments.", + sep = "\n") +``` + +```{r, results='asis'} +flag_picrust_pathways <- isTRUE(params$picrust_pathways != "") +``` + +```{r, eval = flag_picrust_pathways, results='asis'} +cat("## PICRUSt2\n", + "PICRUSt2 (Phylogenetic Investigation of Communities by Reconstruction of Unobserved States) is a software for predicting functional abundances based only on marker gene sequences.", + "Enzyme Classification numbers (EC), KEGG orthologs (KO) and MetaCyc ontology predictions were made for each sample.", + "In folder [PICRUSt2/](../PICRUSt2/) are predicted quantifications for Enzyme Classification numbers (EC), see 'EC_pred_metagenome_unstrat_descrip.tsv', KEGG orthologs (KO), see 'KO_pred_metagenome_unstrat_descrip.tsv', MetaCyc ontology, see 'METACYC_path_abun_unstrat_descrip.tsv'.", + "Quantifications are not normalized yet, they can be normalized e.g. by the total sum per sample.", + sep = "\n") ``` diff --git a/bin/generate_report.R b/bin/generate_report.R index ee840da2..9e1323c0 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -53,7 +53,14 @@ option_list = list( make_option(c("--pplace_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--flag_pplace_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), make_option(c("--flag_qiime2_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), - make_option(c("--qiime2_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character") + make_option(c("--qiime2_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--flag_skip_qiime"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), + make_option(c("--flag_skip_barplot"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), + make_option(c("--flag_skip_abundance_tables"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), + make_option(c("--flag_skip_alpha_rarefaction"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), + make_option(c("--flag_skip_diversity_indices"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), + make_option(c("--flag_skip_ancom"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), + make_option(c("--picrust_pathways"), type="character", default=NULL, help="MultiQC plots", metavar="character") ) opt_parser = OptionParser(option_list = option_list) @@ -108,4 +115,11 @@ rmarkdown::render(opt$report, output_file = opt$output, flag_pplace_taxonomy = opt$flag_pplace_taxonomy, pplace_taxonomy = opt$pplace_taxonomy, flag_qiime2_taxonomy = opt$flag_qiime2_taxonomy, - qiime2_taxonomy = opt$qiime2_taxonomy)) + qiime2_taxonomy = opt$qiime2_taxonomy, + flag_skip_qiime = opt$flag_skip_qiime, + flag_skip_barplot = opt$flag_skip_barplot, + flag_skip_abundance_tables = opt$flag_skip_abundance_tables, + flag_skip_alpha_rarefaction = opt$flag_skip_alpha_rarefaction, + flag_skip_diversity_indices = opt$flag_skip_diversity_indices, + flag_skip_ancom = opt$flag_skip_ancom, + picrust_pathways = opt$picrust_pathways)) diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 9c72edc8..82df029e 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -36,6 +36,13 @@ process SUMMARY_REPORT { path(sintax_tax) path(pplace_tax) path(qiime2_tax) + val(run_qiime2) + path(barplot) + val(abundance_tables) + val(alpha_rarefaction) + val(diversity_indices) + val(ancom) + path(picrust_pathways) output: @@ -74,6 +81,13 @@ process SUMMARY_REPORT { def sintax_taxonomy = sintax_tax ? "--flag_sintax_taxonomy --sintax_taxonomy $sintax_tax" : "" def pplace_taxonomy = pplace_tax ? "--flag_pplace_taxonomy --pplace_taxonomy $pplace_tax" : "" def qiime2_taxonomy = qiime2_tax ? "--flag_qiime2_taxonomy --qiime2_taxonomy $qiime2_tax" : "" + def qiime2 = run_qiime2 ? "" : "--flag_skip_qiime" + qiime2 += barplot ? "" : " --flag_skip_barplot" + qiime2 += abundance_tables ? "" : " --flag_skip_abundance_tables" + qiime2 += alpha_rarefaction ? "" : " --flag_skip_alpha_rarefaction" + qiime2 += diversity_indices ? "" : " --flag_skip_diversity_indices" + qiime2 += ancom ? "" : " --flag_skip_ancom" + def picrust = picrust_pathways ? "--picrust_pathways $picrust_pathways" : "" """ generate_report.R --report $report_template \\ --output "Summary_Report.html" \\ @@ -98,7 +112,9 @@ process SUMMARY_REPORT { $dada2_taxonomy \\ $sintax_taxonomy \\ $pplace_taxonomy \\ - $qiime2_taxonomy + $qiime2_taxonomy \\ + $qiime2 \\ + $picrust """ //--pl_results $results_dir \\ //cat <<-END_VERSIONS > versions.yml diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 36b33a75..60e7d8ed 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -704,7 +704,15 @@ workflow AMPLISEQ { !params.skip_taxonomy && params.dada_ref_taxonomy && !params.skip_dada_taxonomy ? ch_dada2_tax : [], !params.skip_taxonomy && params.sintax_ref_taxonomy ? ch_sintax_tax : [], !params.skip_taxonomy && params.pplace_tree ? ch_pplace_tax : [], - !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv : [] + !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv : [], + run_qiime2, + run_qiime2 && !params.skip_barplot ? QIIME2_BARPLOT.out.folder : [], + run_qiime2 && !params.skip_abundance_tables ? "done" : "", + run_qiime2 && !params.skip_alpha_rarefaction ? "done" : "", + run_qiime2 && !params.skip_diversity_indices && params.metadata ? "done" : "", + run_qiime2 && !params.skip_ancom && params.metadata ? "done" : "", + params.picrust ? PICRUST.out.pathways : [] + // params.qiime_adonis_formula ) From 3430daec1f7c7636c8e00bc2ae714c85b6a1148f Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 30 Jun 2023 11:28:03 +0200 Subject: [PATCH 054/230] add --skip_summary_report, module versions, db versions --- assets/report_template.Rmd | 23 ++----- bin/generate_report.R | 8 ++- conf/modules.config | 5 +- modules/local/summary_report.nf | 25 ++++---- nextflow.config | 1 + nextflow_schema.json | 4 ++ workflows/ampliseq.nf | 103 ++++++++++++++++---------------- 7 files changed, 82 insertions(+), 87 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 24ae4cb6..9701791d 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -41,6 +41,9 @@ params: min_len_asv: "" max_len_asv: "" cut_its: "" + dada2_ref_tax_title: "" + qiime2_ref_tax_title: "" + sintax_ref_tax_title: "" # file paths mqc_plot: "" @@ -64,7 +67,6 @@ params: filter_codons: "" stop_codons: "" itsx_cutasv_summary: "" - ref_tax_path: "" dada2_taxonomy: "" sintax_taxonomy: "" pplace_taxonomy: "" @@ -629,20 +631,9 @@ cat("\n\nITSx results can be found in folder [itsx](../itsx).") cat("## DADA2\n") if (!params$flag_ref_tax_user) { - ref_tax <- readLines(params$ref_tax_path) - - db <- "Unknown DB" - for (line in ref_tax){ - if (grepl("Title:", line)) { - db <- sub(".*Title: ", "", line) - } - } - - # Output text db cat("The taxonomic classification was performed by DADA2 using the database: ", - "\"", db, "\".\n\n", sep = "") + "\"", params$dada2_ref_tax_title, "\".\n\n", sep = "") } else { - # Output text db cat("The taxonomic classification was performed by DADA2 using a custom database ", "provided by the user.\n\n", sep = "") } @@ -702,7 +693,7 @@ cat("\n\nDADA2 taxonomy assignments can be found in folder [dada2](../dada2) in # Header cat("## QIIME2\n") -#TODO: add database information +cat("The taxonomic classification was performed by QIIME2 using the database: \"", params$qiime2_ref_tax_title, "\".\n\n", sep = "") #TODO: only tested for greengenes85, need to test also UNITE and SILVA! # Read file and prepare table @@ -764,7 +755,7 @@ cat("\n\nQIIME2 taxonomy assignments can be found in folder [qiime2/taxonomy](.. # Header cat("## SINTAX\n") -#TODO: add database information +cat("The taxonomic classification was performed by SINTAX using the database: \"", params$sintax_ref_tax_title, "\".\n\n", sep = "") asv_tax <- read.table(params$sintax_taxonomy, header = TRUE, sep = "\t") @@ -821,8 +812,6 @@ cat("\n\nSINTAX taxonomy assignments can be found in folder [sintax](../sintax). # Header cat("## Phylogenetic Placement\n") -#TODO: add database information - # Read file and prepare table asv_tax <- read.table(params$pplace_taxonomy, header = TRUE, sep = "\t") diff --git a/bin/generate_report.R b/bin/generate_report.R index 9e1323c0..d39fd72b 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -45,14 +45,16 @@ option_list = list( make_option(c("--filter_len_asv_len_orig"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--itsx_cutasv_summary"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--cut_its"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--ref_tax_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--dada2_ref_tax_title"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--dada2_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--flag_dada2_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), + make_option(c("--sintax_ref_tax_title"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--sintax_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--flag_sintax_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), make_option(c("--pplace_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--flag_pplace_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), make_option(c("--flag_qiime2_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), + make_option(c("--qiime2_ref_tax_title"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--qiime2_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--flag_skip_qiime"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), make_option(c("--flag_skip_barplot"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), @@ -107,14 +109,16 @@ rmarkdown::render(opt$report, output_file = opt$output, itsx_cutasv_summary = opt$itsx_cutasv_summary, cut_its = opt$cut_its, cut_its = opt$cut_its, - ref_tax_path = opt$ref_tax_path, + dada2_ref_tax_title = opt$dada2_ref_tax_title, dada2_taxonomy = opt$dada2_taxonomy, flag_dada2_taxonomy = opt$flag_dada2_taxonomy, flag_sintax_taxonomy = opt$flag_sintax_taxonomy, + sintax_ref_tax_title = opt$sintax_ref_tax_title, sintax_taxonomy = opt$sintax_taxonomy, flag_pplace_taxonomy = opt$flag_pplace_taxonomy, pplace_taxonomy = opt$pplace_taxonomy, flag_qiime2_taxonomy = opt$flag_qiime2_taxonomy, + qiime2_ref_tax_title = opt$qiime2_ref_tax_title, qiime2_taxonomy = opt$qiime2_taxonomy, flag_skip_qiime = opt$flag_skip_qiime, flag_skip_barplot = opt$flag_skip_barplot, diff --git a/conf/modules.config b/conf/modules.config index 88b7dd8a..74e7afe7 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -804,9 +804,8 @@ process { withName: SUMMARY_REPORT { publishDir = [ - path: { "${params.outdir}/Summary_Report" }, - mode: params.publish_dir_mode, - pattern: '*.html' + path: { "${params.outdir}/summary_report" }, + mode: params.publish_dir_mode ] } } diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 82df029e..bdca6230 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -31,7 +31,6 @@ process SUMMARY_REPORT { path(filter_len_asv_len_orig) path(filter_codons_stats) path(itsx_cutasv_summary) - path(dada2_tax_reference) path(dada2_tax) path(sintax_tax) path(pplace_tax) @@ -46,8 +45,8 @@ process SUMMARY_REPORT { output: - path "Summary_Report.html" , emit: report - //path "versions.yml" , emit: versions + path "summary_report.html" , emit: report + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -77,10 +76,10 @@ process SUMMARY_REPORT { def filter_codons = filter_codons_stats ? "--filter_codons $filter_codons_stats --stop_codons $params.stop_codons" : "" def itsx_cutasv = itsx_cutasv_summary ? "--itsx_cutasv_summary $itsx_cutasv_summary --cut_its $params.cut_its" : "--cut_its none" def dada2_taxonomy = !dada2_tax ? "" : - params.dada_ref_tax_custom ? "--flag_dada2_taxonomy --dada2_taxonomy $dada2_tax --ref_tax_user" : "--flag_dada2_taxonomy --dada2_taxonomy $dada2_tax --ref_tax_path $dada2_tax_reference" - def sintax_taxonomy = sintax_tax ? "--flag_sintax_taxonomy --sintax_taxonomy $sintax_tax" : "" + params.dada_ref_tax_custom ? "--flag_dada2_taxonomy --dada2_taxonomy $dada2_tax --ref_tax_user" : "--flag_dada2_taxonomy --dada2_taxonomy $dada2_tax --dada2_ref_tax_title '${params.dada_ref_databases[params.dada_ref_taxonomy]["title"]}'" + def sintax_taxonomy = sintax_tax ? "--flag_sintax_taxonomy --sintax_taxonomy $sintax_tax --sintax_ref_tax_title '${params.sintax_ref_databases[params.sintax_ref_taxonomy]["title"]}'" : "" def pplace_taxonomy = pplace_tax ? "--flag_pplace_taxonomy --pplace_taxonomy $pplace_tax" : "" - def qiime2_taxonomy = qiime2_tax ? "--flag_qiime2_taxonomy --qiime2_taxonomy $qiime2_tax" : "" + def qiime2_taxonomy = qiime2_tax ? "--flag_qiime2_taxonomy --qiime2_taxonomy $qiime2_tax --qiime2_ref_tax_title '${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}'" : "" def qiime2 = run_qiime2 ? "" : "--flag_skip_qiime" qiime2 += barplot ? "" : " --flag_skip_barplot" qiime2 += abundance_tables ? "" : " --flag_skip_abundance_tables" @@ -90,7 +89,7 @@ process SUMMARY_REPORT { def picrust = picrust_pathways ? "--picrust_pathways $picrust_pathways" : "" """ generate_report.R --report $report_template \\ - --output "Summary_Report.html" \\ + --output "summary_report.html" \\ $fastqc \\ $cutadapt \\ $dada_quality \\ @@ -115,10 +114,12 @@ process SUMMARY_REPORT { $qiime2_taxonomy \\ $qiime2 \\ $picrust + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + R: \$(R --version 2>&1 | sed -n 1p | sed 's/R version //' | sed 's/ (.*//') + rmarkdown: \$(Rscript -e "cat(paste(packageVersion('rmarkdown'), collapse='.'))") + knitr: \$(Rscript -e "cat(paste(packageVersion('knitr'), collapse='.'))") + END_VERSIONS """ - //--pl_results $results_dir \\ - //cat <<-END_VERSIONS > versions.yml - //"${task.process}": - // R: \$(R --version 2>&1 | sed -n 1p | sed 's/R version //' | sed 's/ (.*//') - //END_VERSIONS } diff --git a/nextflow.config b/nextflow.config index 9f28feed..7e43914f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -86,6 +86,7 @@ params { skip_diversity_indices = false skip_ancom = false skip_multiqc = false + skip_summary_report = false // Database options dada_ref_taxonomy = "silva=138" diff --git a/nextflow_schema.json b/nextflow_schema.json index 7d733cf8..75b4def4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -557,6 +557,10 @@ "skip_multiqc": { "type": "boolean", "description": "Skip MultiQC reporting" + }, + "skip_summary_report": { + "type": "boolean", + "description": "Skip Markdown summary report" } } }, diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 60e7d8ed..95c4974f 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -665,59 +665,56 @@ workflow AMPLISEQ { // // MODULE: Summary Report // - - SUMMARY_REPORT ( - Channel.fromPath("${baseDir}/assets/report_template.Rmd"), - Channel.fromPath("${baseDir}/assets/report_styles.css"), - !params.skip_multiqc ? MULTIQC.out.plots : [], //.collect().flatten().collectFile(name: "mqc_fastqc_per_sequence_quality_scores_plot_1.svg") - !params.skip_cutadapt ? CUTADAPT_WORKFLOW.out.summary.collect() : [], - find_truncation_values, - DADA2_PREPROCESSING.out.args.first(), - !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg : [], - !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg_preprocessed : [], - DADA2_ERR.out.svg - .map { - meta_old, svgs -> - def meta = [:] - meta.single_end = meta_old.single_end - [ meta, svgs, meta_old.run ] } - .groupTuple(by: 0 ) - .map { - meta_old, svgs, runs -> - def meta = [:] - meta.single_end = meta_old.single_end - meta.run = runs.flatten() - [ meta, svgs.flatten() ] - }, - DADA2_MERGE.out.asv, - DADA2_MERGE.out.fasta, - DADA2_MERGE.out.dada2asv, - DADA2_MERGE.out.dada2stats, - !params.skip_barrnap ? BARRNAPSUMMARY.out.summary : [], - params.filter_ssu ? FILTER_SSU.out.stats : [], - params.filter_ssu ? FILTER_SSU.out.asv : [], - params.min_len_asv || params.max_len_asv ? FILTER_LEN_ASV.out.stats : [], - params.min_len_asv || params.max_len_asv ? FILTER_LEN_ASV.out.len_orig : [], - params.filter_codons ? FILTER_CODONS.out.stats : [], - params.cut_its != "none" ? ITSX_CUTASV.out.summary : [], - !params.skip_taxonomy && params.dada_ref_taxonomy && !params.skip_dada_taxonomy && !params.dada_ref_tax_custom ? FORMAT_TAXONOMY.out.ref_tax_info : [], - !params.skip_taxonomy && params.dada_ref_taxonomy && !params.skip_dada_taxonomy ? ch_dada2_tax : [], - !params.skip_taxonomy && params.sintax_ref_taxonomy ? ch_sintax_tax : [], - !params.skip_taxonomy && params.pplace_tree ? ch_pplace_tax : [], - !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv : [], - run_qiime2, - run_qiime2 && !params.skip_barplot ? QIIME2_BARPLOT.out.folder : [], - run_qiime2 && !params.skip_abundance_tables ? "done" : "", - run_qiime2 && !params.skip_alpha_rarefaction ? "done" : "", - run_qiime2 && !params.skip_diversity_indices && params.metadata ? "done" : "", - run_qiime2 && !params.skip_ancom && params.metadata ? "done" : "", - params.picrust ? PICRUST.out.pathways : [] - // params.qiime_adonis_formula - ) - - - // TODO Versions in Report - //ch_versions = ch_versions.mix(SUMMARY_REPORT.out.versions) + if (!params.skip_summary_report) { + SUMMARY_REPORT ( + Channel.fromPath("${baseDir}/assets/report_template.Rmd"), + Channel.fromPath("${baseDir}/assets/report_styles.css"), + !params.skip_multiqc ? MULTIQC.out.plots : [], //.collect().flatten().collectFile(name: "mqc_fastqc_per_sequence_quality_scores_plot_1.svg") + !params.skip_cutadapt ? CUTADAPT_WORKFLOW.out.summary.collect() : [], + find_truncation_values, + DADA2_PREPROCESSING.out.args.first(), + !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg : [], + !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg_preprocessed : [], + DADA2_ERR.out.svg + .map { + meta_old, svgs -> + def meta = [:] + meta.single_end = meta_old.single_end + [ meta, svgs, meta_old.run ] } + .groupTuple(by: 0 ) + .map { + meta_old, svgs, runs -> + def meta = [:] + meta.single_end = meta_old.single_end + meta.run = runs.flatten() + [ meta, svgs.flatten() ] + }, + DADA2_MERGE.out.asv, + DADA2_MERGE.out.fasta, + DADA2_MERGE.out.dada2asv, + DADA2_MERGE.out.dada2stats, + !params.skip_barrnap ? BARRNAPSUMMARY.out.summary : [], + params.filter_ssu ? FILTER_SSU.out.stats : [], + params.filter_ssu ? FILTER_SSU.out.asv : [], + params.min_len_asv || params.max_len_asv ? FILTER_LEN_ASV.out.stats : [], + params.min_len_asv || params.max_len_asv ? FILTER_LEN_ASV.out.len_orig : [], + params.filter_codons ? FILTER_CODONS.out.stats : [], + params.cut_its != "none" ? ITSX_CUTASV.out.summary : [], + !params.skip_taxonomy && params.dada_ref_taxonomy && !params.skip_dada_taxonomy ? ch_dada2_tax : [], + !params.skip_taxonomy && params.sintax_ref_taxonomy ? ch_sintax_tax : [], + !params.skip_taxonomy && params.pplace_tree ? ch_pplace_tax : [], + !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv : [], + run_qiime2, + run_qiime2 && !params.skip_barplot ? QIIME2_BARPLOT.out.folder : [], + run_qiime2 && !params.skip_abundance_tables ? "done" : "", + run_qiime2 && !params.skip_alpha_rarefaction ? "done" : "", + run_qiime2 && !params.skip_diversity_indices && params.metadata ? "done" : "", + run_qiime2 && !params.skip_ancom && params.metadata ? "done" : "", + params.picrust ? PICRUST.out.pathways : [] + // params.qiime_adonis_formula + ) + ch_versions = ch_versions.mix(SUMMARY_REPORT.out.versions) + } //Save input in results folder input = file(params.input) From b74949a2a89a4fe61b363b36f082db124f2dc17c Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 30 Jun 2023 13:28:05 +0200 Subject: [PATCH 055/230] add qiime2 asv filtering --- assets/report_template.Rmd | 45 ++++++++++++++++++++++++++++++++- bin/generate_report.R | 10 ++++++++ modules/local/summary_report.nf | 3 +++ workflows/ampliseq.nf | 2 ++ 4 files changed, 59 insertions(+), 1 deletion(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 9701791d..fd149098 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -44,6 +44,10 @@ params: dada2_ref_tax_title: "" qiime2_ref_tax_title: "" sintax_ref_tax_title: "" + exclude_taxa: "" + min_frequency: "" + min_samples: "" + qiime2_filtertaxa: "" # file paths mqc_plot: "" @@ -71,6 +75,7 @@ params: sintax_taxonomy: "" pplace_taxonomy: "" qiime2_taxonomy: "" + filter_stats_tsv: "" picrust_pathways: "" --- @@ -864,7 +869,45 @@ cat("\n\nPhylogenetic placement taxonomy assignments can be found in folder [ppl # Header cat("# Downstream analysis with QIIME2\n") -#TODO: report filtering & qiime2 input files in qiime2/input/ +#TODO: qiime2 input files in qiime2/input/ +``` + +```{r, results='asis'} +flag_filter_stats_tsv <- isTRUE(params$filter_stats_tsv != "") +``` + +```{r, eval = params$flag_filter_stats_tsv, results='asis'} +cat("## ASV filtering\n", + "Unwanted taxa are often off-targets generated in PCR with primers that are not perfectly specific for the target DNA, for 16S rRNA sequencing mitrochondria and chloroplast sequences are typically removed because these are frequent unwanted non-bacteria PCR products. ") +if ( params$exclude_taxa != "none" ) { + cat("ASVs were removed when the taxonomic string contanied any of", params$exclude_taxa) +} +if ( params$min_frequency != 1 ) { + cat(", had fewer than", params$min_frequency ,"total read counts over all sample") +} +if ( params$min_samples != 1 ) { + cat(", and that were present in fewer than", params$min_samples ,"samples") +} +cat(". ") + +qiime2_filtertaxa <- unlist( strsplit( params$qiime2_filtertaxa, "," ) ) +qiime2_filtertaxa_orig <- as.numeric( qiime2_filtertaxa[1] ) +qiime2_filtertaxa_filt <- as.numeric( qiime2_filtertaxa[2] ) +qiime2_filtertaxa_rm <- qiime2_filtertaxa_orig-qiime2_filtertaxa_filt +qiime2_filtertaxa_rm_percent <- round( qiime2_filtertaxa_rm/qiime2_filtertaxa_orig*100 ,2) + +cat("Consequently,",qiime2_filtertaxa_orig,"ASVs were reduced by",qiime2_filtertaxa_rm,"(",qiime2_filtertaxa_rm_percent,"%) to",qiime2_filtertaxa_filt,".", + "The following table shows (read) counts for each sample before and after filtering:") + +# import stats tsv +filter_stats_tsv <- read.table(file = params$filter_stats_tsv, header = TRUE, sep = "\t") +colnames(filter_stats_tsv) <- gsub("_tax_filter","",colnames(filter_stats_tsv)) + +# Display table +datatable(filter_stats_tsv, options = list( + scrollX = TRUE, + scrollY = "300px", + paging = FALSE)) ``` ```{r, eval = !params$flag_skip_abundance_tables, results='asis'} diff --git a/bin/generate_report.R b/bin/generate_report.R index d39fd72b..5c35a871 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -57,6 +57,11 @@ option_list = list( make_option(c("--qiime2_ref_tax_title"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--qiime2_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--flag_skip_qiime"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), + make_option(c("--filter_stats_tsv"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--qiime2_filtertaxa"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--exclude_taxa"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--min_frequency"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--min_samples"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--flag_skip_barplot"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), make_option(c("--flag_skip_abundance_tables"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), make_option(c("--flag_skip_alpha_rarefaction"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), @@ -121,6 +126,11 @@ rmarkdown::render(opt$report, output_file = opt$output, qiime2_ref_tax_title = opt$qiime2_ref_tax_title, qiime2_taxonomy = opt$qiime2_taxonomy, flag_skip_qiime = opt$flag_skip_qiime, + filter_stats_tsv = opt$filter_stats_tsv, + qiime2_filtertaxa = opt$qiime2_filtertaxa, + exclude_taxa = opt$exclude_taxa, + min_frequency = opt$min_frequency, + min_samples = opt$min_samples, flag_skip_barplot = opt$flag_skip_barplot, flag_skip_abundance_tables = opt$flag_skip_abundance_tables, flag_skip_alpha_rarefaction = opt$flag_skip_alpha_rarefaction, diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index bdca6230..fc25bae7 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -36,6 +36,8 @@ process SUMMARY_REPORT { path(pplace_tax) path(qiime2_tax) val(run_qiime2) + val(qiime2_filtertaxa) // , + path(filter_stats_tsv) path(barplot) val(abundance_tables) val(alpha_rarefaction) @@ -81,6 +83,7 @@ process SUMMARY_REPORT { def pplace_taxonomy = pplace_tax ? "--flag_pplace_taxonomy --pplace_taxonomy $pplace_tax" : "" def qiime2_taxonomy = qiime2_tax ? "--flag_qiime2_taxonomy --qiime2_taxonomy $qiime2_tax --qiime2_ref_tax_title '${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}'" : "" def qiime2 = run_qiime2 ? "" : "--flag_skip_qiime" + qiime2 += filter_stats_tsv ? "--filter_stats_tsv $filter_stats_tsv --qiime2_filtertaxa '$qiime2_filtertaxa' --exclude_taxa $params.exclude_taxa --min_frequency $params.min_frequency --min_samples $params.min_samples" : "" qiime2 += barplot ? "" : " --flag_skip_barplot" qiime2 += abundance_tables ? "" : " --flag_skip_abundance_tables" qiime2 += alpha_rarefaction ? "" : " --flag_skip_alpha_rarefaction" diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 95c4974f..6d04912a 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -705,6 +705,8 @@ workflow AMPLISEQ { !params.skip_taxonomy && params.pplace_tree ? ch_pplace_tax : [], !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv : [], run_qiime2, + run_qiime2 && ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) ? ch_dada2_asv.countLines()+","+QIIME2_FILTERTAXA.out.tsv.countLines() : "", + run_qiime2 && ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) ? FILTER_STATS.out.tsv : [], run_qiime2 && !params.skip_barplot ? QIIME2_BARPLOT.out.folder : [], run_qiime2 && !params.skip_abundance_tables ? "done" : "", run_qiime2 && !params.skip_alpha_rarefaction ? "done" : "", From 3e750711dfcf3c10a6365163b9f9016ad6b809b4 Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 30 Jun 2023 13:51:58 +0200 Subject: [PATCH 056/230] add info to downstream analysis section --- assets/report_template.Rmd | 13 +++++++++---- bin/generate_report.R | 2 ++ modules/local/summary_report.nf | 5 +++-- workflows/ampliseq.nf | 7 +++++++ 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index fd149098..7f77dd07 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -48,6 +48,7 @@ params: min_frequency: "" min_samples: "" qiime2_filtertaxa: "" + val_used_taxonomy: "" # file paths mqc_plot: "" @@ -815,7 +816,9 @@ cat("\n\nSINTAX taxonomy assignments can be found in folder [sintax](../sintax). ```{r, eval = params$flag_pplace_taxonomy, results='asis'} # Header -cat("## Phylogenetic Placement\n") +cat("## Phylogenetic Placement\n", + "Phylogenetic placement grafts sequences onto a phylogenetic reference tree and optionally outputs taxonomic annotations. The reference tree is ideally made from full-length high-quality sequences containing better evolutionary signal than short amplicons. It is hence superior to estimating de-novo phylogenetic trees from short amplicon sequences. ", + "Extraction of taxonomic classification wads performed with EPA-NG and GAPPA.") # Read file and prepare table asv_tax <- read.table(params$pplace_taxonomy, header = TRUE, sep = "\t") @@ -867,9 +870,9 @@ cat("\n\nPhylogenetic placement taxonomy assignments can be found in folder [ppl ```{r, eval = !params$flag_skip_qiime, results='asis'} # Header -cat("# Downstream analysis with QIIME2\n") - -#TODO: qiime2 input files in qiime2/input/ +cat("# Downstream analysis with QIIME2\n", + "Files that were input to QIIME2 can be found in folder [qiime2/input/](../qiime2/input/).", + "Results of taxonomic classification of",params$val_used_taxonomy,"was used in all following analysis, see in the above sections.") ``` ```{r, results='asis'} @@ -908,6 +911,8 @@ datatable(filter_stats_tsv, options = list( scrollX = TRUE, scrollY = "300px", paging = FALSE)) + +cat("\n\nTables with read count numbers and filtered abundance tables are in folder [qiime2/abundance_tables](../qiime2/abundance_tables).") ``` ```{r, eval = !params$flag_skip_abundance_tables, results='asis'} diff --git a/bin/generate_report.R b/bin/generate_report.R index 5c35a871..4aa1c420 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -54,6 +54,7 @@ option_list = list( make_option(c("--pplace_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--flag_pplace_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), make_option(c("--flag_qiime2_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), + make_option(c("--val_used_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--qiime2_ref_tax_title"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--qiime2_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--flag_skip_qiime"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), @@ -123,6 +124,7 @@ rmarkdown::render(opt$report, output_file = opt$output, flag_pplace_taxonomy = opt$flag_pplace_taxonomy, pplace_taxonomy = opt$pplace_taxonomy, flag_qiime2_taxonomy = opt$flag_qiime2_taxonomy, + val_used_taxonomy = opt$val_used_taxonomy, qiime2_ref_tax_title = opt$qiime2_ref_tax_title, qiime2_taxonomy = opt$qiime2_taxonomy, flag_skip_qiime = opt$flag_skip_qiime, diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index fc25bae7..fdf8fa17 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -36,6 +36,7 @@ process SUMMARY_REPORT { path(pplace_tax) path(qiime2_tax) val(run_qiime2) + val(val_used_taxonomy) val(qiime2_filtertaxa) // , path(filter_stats_tsv) path(barplot) @@ -82,8 +83,8 @@ process SUMMARY_REPORT { def sintax_taxonomy = sintax_tax ? "--flag_sintax_taxonomy --sintax_taxonomy $sintax_tax --sintax_ref_tax_title '${params.sintax_ref_databases[params.sintax_ref_taxonomy]["title"]}'" : "" def pplace_taxonomy = pplace_tax ? "--flag_pplace_taxonomy --pplace_taxonomy $pplace_tax" : "" def qiime2_taxonomy = qiime2_tax ? "--flag_qiime2_taxonomy --qiime2_taxonomy $qiime2_tax --qiime2_ref_tax_title '${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}'" : "" - def qiime2 = run_qiime2 ? "" : "--flag_skip_qiime" - qiime2 += filter_stats_tsv ? "--filter_stats_tsv $filter_stats_tsv --qiime2_filtertaxa '$qiime2_filtertaxa' --exclude_taxa $params.exclude_taxa --min_frequency $params.min_frequency --min_samples $params.min_samples" : "" + def qiime2 = run_qiime2 ? "--val_used_taxonomy $val_used_taxonomy" : "--flag_skip_qiime" + qiime2 += filter_stats_tsv ? " --filter_stats_tsv $filter_stats_tsv --qiime2_filtertaxa '$qiime2_filtertaxa' --exclude_taxa $params.exclude_taxa --min_frequency $params.min_frequency --min_samples $params.min_samples" : "" qiime2 += barplot ? "" : " --flag_skip_barplot" qiime2 += abundance_tables ? "" : " --flag_skip_abundance_tables" qiime2 += alpha_rarefaction ? "" : " --flag_skip_alpha_rarefaction" diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 6d04912a..b0c1bf16 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -496,23 +496,29 @@ workflow AMPLISEQ { // Import taxonomic classification into QIIME2, if available if ( params.skip_taxonomy ) { log.info "Skip taxonomy classification" + val_used_taxonomy = "skipped" ch_tax = Channel.empty() tax_agglom_min = 1 tax_agglom_max = 2 } else if ( params.sintax_ref_taxonomy ) { log.info "Use SINTAX taxonomy classification" + val_used_taxonomy = "SINTAX" ch_tax = QIIME2_INTAX ( ch_sintax_tax ).qza } else if ( params.pplace_tree && params.pplace_taxonomy) { log.info "Use EPA-NG / GAPPA taxonomy classification" + val_used_taxonomy = "phylogenetic placement" ch_tax = QIIME2_INTAX ( ch_pplace_tax ).qza } else if ( params.dada_ref_taxonomy && !params.skip_dada_taxonomy ) { log.info "Use DADA2 taxonomy classification" + val_used_taxonomy = "DADA2" ch_tax = QIIME2_INTAX ( ch_dada2_tax ).qza } else if ( params.qiime_ref_taxonomy || params.classifier ) { log.info "Use QIIME2 taxonomy classification" + val_used_taxonomy = "QIIME2" ch_tax = QIIME2_TAXONOMY.out.qza } else { log.info "Use no taxonomy classification" + val_used_taxonomy = "none" ch_tax = Channel.empty() tax_agglom_min = 1 tax_agglom_max = 2 @@ -705,6 +711,7 @@ workflow AMPLISEQ { !params.skip_taxonomy && params.pplace_tree ? ch_pplace_tax : [], !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv : [], run_qiime2, + run_qiime2 ? val_used_taxonomy : "", run_qiime2 && ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) ? ch_dada2_asv.countLines()+","+QIIME2_FILTERTAXA.out.tsv.countLines() : "", run_qiime2 && ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) ? FILTER_STATS.out.tsv : [], run_qiime2 && !params.skip_barplot ? QIIME2_BARPLOT.out.folder : [], From a684309f5400489abe0066b1d0160a3177826b56 Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 30 Jun 2023 14:24:03 +0200 Subject: [PATCH 057/230] add intro and outro text --- assets/report_template.Rmd | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 7f77dd07..9d2d4e92 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -91,6 +91,9 @@ library("purrr") knitr::opts_chunk$set(echo = FALSE) ``` +The bioinformatics analysis pipeline [nfcore/ampliseq](https://nf-co.re/ampliseq) is used for amplicon sequencing, +supporting denoising of any amplicon and supports a variety of taxonomic databases for taxonomic assignment of 16S, ITS, CO1 and 18S amplicons. + # Preprocessing ```{r, eval = !params$flag_skip_fastqc, results='asis'} @@ -900,7 +903,7 @@ qiime2_filtertaxa_rm <- qiime2_filtertaxa_orig-qiime2_filtertaxa_filt qiime2_filtertaxa_rm_percent <- round( qiime2_filtertaxa_rm/qiime2_filtertaxa_orig*100 ,2) cat("Consequently,",qiime2_filtertaxa_orig,"ASVs were reduced by",qiime2_filtertaxa_rm,"(",qiime2_filtertaxa_rm_percent,"%) to",qiime2_filtertaxa_filt,".", - "The following table shows (read) counts for each sample before and after filtering:") + "The following table shows read counts for each sample before and after filtering:") # import stats tsv filter_stats_tsv <- read.table(file = params$filter_stats_tsv, header = TRUE, sep = "\t") @@ -982,3 +985,13 @@ cat("## PICRUSt2\n", "Quantifications are not normalized yet, they can be normalized e.g. by the total sum per sample.", sep = "\n") ``` + +# Final notes + +This report (file 'summary_report.html') is located in folder [summary_report](.) of the original pipeline results folder. +In this file, all links to files and folders are relative, therefore hyperlinks will only work when the report is at its original place in the pipeline results folder. + +A comprehensive read count report throughout the pipeline can be found in the [base results folder](../) in file 'overall_summary.tsv'. +Technical information to the pipeline run are collected in folder [pipeline_info](../pipeline_info). + +Please cite the [pipeline publication](https://doi.org/10.3389/fmicb.2020.550420) and any software tools used by the pipeline (see [citations](https://nf-co.re/ampliseq#citations)) when you use any of the pipeline results in your study. From 8adef25eb08c30eb3b9d8de617eb7772f663ff2f Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 30 Jun 2023 15:43:31 +0200 Subject: [PATCH 058/230] fix tests --- assets/report_template.Rmd | 6 +++--- modules/local/summary_report.nf | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 9d2d4e92..8dec80ca 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -882,7 +882,7 @@ cat("# Downstream analysis with QIIME2\n", flag_filter_stats_tsv <- isTRUE(params$filter_stats_tsv != "") ``` -```{r, eval = params$flag_filter_stats_tsv, results='asis'} +```{r, eval = flag_filter_stats_tsv, results='asis'} cat("## ASV filtering\n", "Unwanted taxa are often off-targets generated in PCR with primers that are not perfectly specific for the target DNA, for 16S rRNA sequencing mitrochondria and chloroplast sequences are typically removed because these are frequent unwanted non-bacteria PCR products. ") if ( params$exclude_taxa != "none" ) { @@ -897,8 +897,8 @@ if ( params$min_samples != 1 ) { cat(". ") qiime2_filtertaxa <- unlist( strsplit( params$qiime2_filtertaxa, "," ) ) -qiime2_filtertaxa_orig <- as.numeric( qiime2_filtertaxa[1] ) -qiime2_filtertaxa_filt <- as.numeric( qiime2_filtertaxa[2] ) +qiime2_filtertaxa_orig <- as.numeric( qiime2_filtertaxa[1] ) -1 +qiime2_filtertaxa_filt <- as.numeric( qiime2_filtertaxa[2] ) -2 qiime2_filtertaxa_rm <- qiime2_filtertaxa_orig-qiime2_filtertaxa_filt qiime2_filtertaxa_rm_percent <- round( qiime2_filtertaxa_rm/qiime2_filtertaxa_orig*100 ,2) diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index fdf8fa17..9f23e919 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -37,7 +37,7 @@ process SUMMARY_REPORT { path(qiime2_tax) val(run_qiime2) val(val_used_taxonomy) - val(qiime2_filtertaxa) // , + val(qiime2_filtertaxa) // , path(filter_stats_tsv) path(barplot) val(abundance_tables) @@ -83,7 +83,7 @@ process SUMMARY_REPORT { def sintax_taxonomy = sintax_tax ? "--flag_sintax_taxonomy --sintax_taxonomy $sintax_tax --sintax_ref_tax_title '${params.sintax_ref_databases[params.sintax_ref_taxonomy]["title"]}'" : "" def pplace_taxonomy = pplace_tax ? "--flag_pplace_taxonomy --pplace_taxonomy $pplace_tax" : "" def qiime2_taxonomy = qiime2_tax ? "--flag_qiime2_taxonomy --qiime2_taxonomy $qiime2_tax --qiime2_ref_tax_title '${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}'" : "" - def qiime2 = run_qiime2 ? "--val_used_taxonomy $val_used_taxonomy" : "--flag_skip_qiime" + def qiime2 = run_qiime2 ? "--val_used_taxonomy '$val_used_taxonomy'" : "--flag_skip_qiime" qiime2 += filter_stats_tsv ? " --filter_stats_tsv $filter_stats_tsv --qiime2_filtertaxa '$qiime2_filtertaxa' --exclude_taxa $params.exclude_taxa --min_frequency $params.min_frequency --min_samples $params.min_samples" : "" qiime2 += barplot ? "" : " --flag_skip_barplot" qiime2 += abundance_tables ? "" : " --flag_skip_abundance_tables" From 3db7a34e782659dedfc67a9230cc275cdc7081d7 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Fri, 30 Jun 2023 16:12:52 +0000 Subject: [PATCH 059/230] Template update for nf-core/tools version 2.9 --- .github/CONTRIBUTING.md | 1 - .github/ISSUE_TEMPLATE/bug_report.yml | 2 +- .github/workflows/awsfulltest.yml | 11 +- .github/workflows/awstest.yml | 10 +- .github/workflows/ci.yml | 2 +- .gitpod.yml | 5 + CHANGELOG.md | 2 +- CITATIONS.md | 6 + README.md | 6 +- assets/methods_description_template.yml | 12 +- assets/multiqc_config.yml | 4 +- assets/nf-core-ampliseq_logo_light.png | Bin 11218 -> 75142 bytes assets/slackreport.json | 2 +- conf/test_full.config | 2 - docs/usage.md | 6 +- lib/NfcoreSchema.groovy | 530 ------------------------ lib/NfcoreTemplate.groovy | 2 +- lib/WorkflowAmpliseq.groovy | 45 +- lib/WorkflowMain.groovy | 37 -- main.nf | 16 + nextflow.config | 54 ++- nextflow_schema.json | 36 +- workflows/ampliseq.nf | 25 +- 23 files changed, 177 insertions(+), 639 deletions(-) delete mode 100755 lib/NfcoreSchema.groovy diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index c58b4779..219d4e29 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -116,4 +116,3 @@ To get started: Devcontainer specs: - [DevContainer config](.devcontainer/devcontainer.json) -- [Dockerfile](.devcontainer/Dockerfile) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 79bc7c1a..3bd77263 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,7 +42,7 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 22.10.1)_ + * Nextflow version _(eg. 23.04.0)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index d66d066a..b17d2263 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v1 + uses: seqeralabs/action-tower-launch@v2 # TODO nf-core: You can customise AWS full pipeline tests as required # Add full size test data (but still relatively small datasets for few samples) # on the `test_full.config` test runs with only one set of parameters @@ -22,13 +22,18 @@ jobs: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/ampliseq/work-${{ github.sha }} parameters: | { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/ampliseq/results-${{ github.sha }}" } - profiles: test_full,aws_tower + profiles: test_full + - uses: actions/upload-artifact@v3 with: name: Tower debug log file - path: tower_action_*.log + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index d42ecdfd..7a4f39de 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -12,18 +12,22 @@ jobs: steps: # Launch workflow using Tower CLI tool action - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v1 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/ampliseq/work-${{ github.sha }} parameters: | { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/ampliseq/results-test-${{ github.sha }}" } - profiles: test,aws_tower + profiles: test + - uses: actions/upload-artifact@v3 with: name: Tower debug log file - path: tower_action_*.log + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 19f7987f..53759d75 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,7 @@ jobs: strategy: matrix: NXF_VER: - - "22.10.1" + - "23.04.0" - "latest-everything" steps: - name: Check out pipeline code diff --git a/.gitpod.yml b/.gitpod.yml index 85d95ecc..25488dcc 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,4 +1,9 @@ image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update vscode: extensions: # based on nf-core.nf-core-extensionpack diff --git a/CHANGELOG.md b/CHANGELOG.md index fed2891a..6a8be928 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.6.0dev - [date] +## v2.7.0dev - [date] Initial release of nf-core/ampliseq, created with the [nf-core](https://nf-co.re/) template. diff --git a/CITATIONS.md b/CITATIONS.md index 3840e85d..e8783763 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -12,7 +12,10 @@ - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. Available online https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. + - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. ## Software packaging/containerisation tools @@ -31,5 +34,8 @@ - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/README.md b/README.md index a891d317..82483cdc 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/ampliseq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) @@ -66,11 +66,11 @@ nextflow run nf-core/ampliseq \ > provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; > see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -For more details, please refer to the [usage documentation](https://nf-co.re/ampliseq/usage) and the [parameter documentation](https://nf-co.re/ampliseq/parameters). +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/ampliseq/usage) and the [parameter documentation](https://nf-co.re/ampliseq/parameters). ## Pipeline output -To see the the results of a test run with a full size dataset refer to the [results](https://nf-co.re/ampliseq/results) tab on the nf-core website pipeline page. +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/ampliseq/results) tab on the nf-core website pipeline page. For more details about the output files and reports, please refer to the [output documentation](https://nf-co.re/ampliseq/output). diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 4e4f33c5..625b2446 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,17 +3,21 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "nf-core/ampliseq Methods Description" section_href: "https://github.com/nf-core/ampliseq" plot_type: "html" -## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline +## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline ## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

-

Data was processed using nf-core/ampliseq v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

Data was processed using nf-core/ampliseq v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

${workflow.commandLine}
+

${tool_citations}

References

    -
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
  • -
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. https://doi.org/10.1038/s41587-020-0439-x
  • +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • +
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • +
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • + ${tool_bibliography}
Notes:
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 3452ef88..1502d618 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/ampliseq + This report has been generated by the nf-core/ampliseq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-ampliseq-methods-description": order: -1000 diff --git a/assets/nf-core-ampliseq_logo_light.png b/assets/nf-core-ampliseq_logo_light.png index dea08d56d0f13fd1da6221ae938b0ea5cbc22ffa..58f01531e7b0bff0f6bf2649dae96ca78b2d2c5f 100644 GIT binary patch literal 75142 zcmeEt`9IX_`~RRQS?ZKSWhn*~p=96c5GGq9OZHNfecuPCQz(&w!1jG0qM))u18{N;szxKLnntC7*Z0~7*=;B1!jv^4p5Gb_^hQ29NgTYTSd@O|5 zS3HI44fR<@BwC_WweNAg^K`t?ay|Ua^`zuS;o*5X;p5j0nLR_3TdTw-*C$<<{Vk$; z9`%au>-b1%=CCl=x~!Jp!Br{RFpzjKp!3X+Tb;*QRKss@Kb){h^c+@seV?p-3zMBT zv9)Zlu({<`v3Pc z_~QTk@G~L)&kz6ShyTBGp!b^mFYH1%8g&}PE+NMRdy{Rgwkaa9QvrRQY2HJz)6`6H z9;J$!8p?T$p0J;N*Ye!J#ykH8M)iUCxVX5E!@pK|Rzc1t45Gxe-2E^GvsRWhY(8G+ zqQw!LH!;zIl^)J$8$X^IcCItbD!;xEnF(K*M&+X@JSfW~(%%?AjAD}I{FvT)!b;+< zT`3RVvHyDV#tr{F?pFSzX|tN{P8k1QHN6RI-9sVD@-lUEm%l0Eg`Uqb{CpIznVgoC zqUmmd=@Irb{U+;BnnF@S4JpEd=f8=bxA|}L4A?vsm9JMY?xEj%PSrz{(B9T6zCrD{ z5aNCa{cB^cli-wq*o{Dpv7Lu_ua|VKlQa68K&C3~Q72#9XybNMzba}b4=Acza~8q2n+%iDoFDn0jDk39X?^7A)!^mJ;E z5ekGVYdquWg)k>J@LX5^<&$Ub>jptvS20#izP!}h(}bdq;~{4o<`Z~-?Z6?eBvmOx zsE#!^me;!Al9p_BB9-oh+Bc@3zYqDCn3hx{MhJ+VI+>dJOaT*E;koA-_dUK}Uzf&# zH;{fF7_10)<{MQM8t=)+Bc#9Hzz?%a`@_R0){SISt$Kn@K8L}>h6mZ|Sq!BZKB@H20kftU}^PiE` z)c*Xdd@3S@t0+sw_uO~aLtzgUG2d;xQ1Q*1H#0qHdV%)wP1#8svyWz%C}A74L_x?B3pf9H&Y@2X=|G$}7iYO?E5Lr+QZ zunjfr@njOx!!AI9VRd9th^kl#?3g$t5Dxfn?H4g>K($Nt+fHaOY#hv@QlJIXl)td!4Cw33#odkl6Y zV>S|OhL=y33;S(CMLA9S@}2)++OhBFrXf0zRg_T_+T~HTPwd7xJV6cPBJX{fB~&hK zs$Fc?B(tfBkrDJu$X3Q1{1zTNRk(@T;z!+JtsYJ#VQFEI95Bp+1d)p+`Gk3TG-5Wg zkhB!>_0%li8!7wS)(5l@KDF!}dm%NoRf{a39g|I_D;7#><0*1`M%3kp01AB_Dq!Zg z8ht}kcgMfVhs)|`f(tl+ixNr3KYnoDKRVH}!H24qCWtT&%xd}zW+opB3MoDNJ0-8f zNvx7d#yy3T+j3B!o%L;!;b>EGDQXB~+h}0EX^k<%)ZBpGVwTz%Bc=Z{6LNVVmQ)Zs z#qHX&f?Rw4S8Pz4H6Vlw2CL`ph1rxV>T3%^&1h1dBkPo8>RjJw|7HE<#P4E!4_OE` zO$@0HI!7pPZx!b@3)8f7f(6Vl`(n8hAxh@*>=H@8QQ)g9oK9SqBFr%3t$}fQ3U0|& zMTUI5{BLzyt1e{`H?CqHGJTzP#T38;zV<;^=nNbG6N-_k!KrUQDx)Z|AC(bG|5a8Z zB*H@M#uON%NKm+sWqkHO`)aB@we3grs9;DMV?Q{%PqLj~`hASTUIF*q`ZO5WR)wVFI`G?Zxevi{$Td5LndKR;aC(U=|9wR~L8w;+zr-%IHsbY> zUgGTk{6DWrVb zYX7qj`>+ae$t5+}$|T_!B3=Erhn`P}k1ai*^PzUqmU{4eDXuat%oMLHRxej$e~5m@ z@ADVp?D3O)y6!#xyXd$s{yrf~zYM$Yrd~^{xM%^*VgG&MleV6Y&|SUNwG!INi~rl; z<-XXdqpn!99)UghSN}nCVm|NOx&~&TmiGceJ?{6R>laTmSZ>pxJbelcMsk4R0F=Ar(?q*%!}BhZw%+9K`8y{Yh!MT%%c;Bib&k(wxLRjmW=N{ro zoje;XgQ^~##P@&C)S#ViS*=Lu%Jg6vf7wA7B1zehn!53h9Ut=hiFVdZ2A1)BWO+Or zT}sR*gJqqhOx-8b1SCR0`&Ue?BhO8gDxoY*R=fY z+Cyn|_k)xr7Y`wB{C-T)JdQ-^IL_#4Kt|xti;{O2Uif`>)vlM+z~WAes&vp2#~e;> zaP#^zhn)Ghwj{nES?XIu)mFnEPiGi7&MHYgMRFdBqLYyRcM0|3NrSwRzt{zDC$Q16 z*lJ*$9KIG@s!K*lv(_p8gm-n5bjuuJKPNIbLluNw9-=Anc+g>>{ftA1)Liqyomg7G z0lZGlRAqUVOzOE5hF~nSdqkDH#ahTn%b<|fSG~?U$lf?xD}R^!j=>M6H8HyWF6y2} zPGPZ%iKNdTp7uW4JWgAQE8vm;X_WJc)Enn#$({*pabQ-s4krlc*`UTUP?m@IrR(4uk6XT&bDN%A5aA~}3fQZ}+Rd6c3 z*IAG-N{$P(j4Q>Srfr2tpV8=0h{!#~3-AoOv!u9tWom_0YBxR+7|^?x3!H1(U)HeMcJvM;GiZDK%TC8~?<`}ApK9*l&Oz?(AV;afU?!7R7^1E3 zn(zjAZ>L6+)k_BZ;z(Js8zvb4U#rVK@}KTN_B?4j^DOxi6XO26e;wx5>Meq@OeH16 zPKhP&D9lsS_dDnqJvA_TPayL?T-&Eo4MaN$Vsh~LOFAw$sP98vj^)e3erB(Ix)0Ed zcRcmT-^mAK97kIoOzJos^3BBIn=oowuyWRsVNp-Q8QI%4?47^vYmBj55kB(7-5G-Jw=*jed)*MV}zlKa?!7quxNI9Dqv5~0*qxF{ z-|ays&_rj1kTx$F^uK@^zBGGr$N8@D5U_4!fjHEh%d}?#HzMqS1VBYf&^KYut?s3z z#x(Dl-G0}fkFA#VYCT#)Cajcq(Xx9}P9Gs}$ynv!cB`zU=s>7GEmrr*<+Gsc;!_6q z1=Fl1&esa#1l?YLx5t#zFs9X%$7g7LW1T&4gw?plYc~G0M)WlGL4fi~%|d=l{ONR0 z(ExtJ#m(uPIko8AUgyCi5<6xC?H?P${GQ>p{S!2bzAysv+#gde=;uWi-SN!d&Z0cl z=Vxa<6L=w~xspnfYZmT}S`g$EU~=c)X2)i+nZgjfLi{{7BR9A9V@M?IiAzae66wR{ zbVBUFuw%J$iY49n2)JM4(tQT$^3x(BBAJp1iSJ3%-4{`4VM1nRNn{A0Wy;eaWAc95 zmX5rTQxA~AmcS{swE)2-o_n~AHzPLsJI(%{&@RtXp}uWD?G!-#W|yZ}HlXQ(*l93tqTy}~zd~*$CAgPi|Hx9G?WY5}M z02i&|#Gzt|tMhtL2iunNy9`lKjcFtdl5U(c0=}qQSucG4Onn{mfpPuC~ zUODq^;@FC~c)^rubE~#vvhN#etKRV16JtlmZIYdM@X)Bpn0CtGAJ@B}v82Whya624 zAWNK=gJR5mxMhoFA9d`R9<}|+y@96bmehO5?J{6J#mA%^uw=C3g0&=Yhgqk{lD6Pl zA2MNCrS_F=zGQJRW^*O@TbhT;+S9Ov8I?CaYg*B%^XJm?+K0UD#yYZ6KNnk=2?@=p zc=mdfEVeY#XB$fMFMFYgxxJ-=GENxkH(mxUP$i=}qjnpYz~jsE$`XWx{Ko z{su~~zYEKQH!jQXa{LphLJz|!xE7Bz&XW0HhkW@%MrHfMT?G}tx!TNXzI;CFJ5KS| z+d?rqica4@b;u}fj(?1w;vxQs=2i$^nPv}O^2q1a?fY1*LTE(|m4YKGJh`lI0QgB5 zLd7Q`gSl>EmtO3M%k!8F{Q_tbt)Q?GgUEKEQ{K}&yDmX?P&-6cwO7Pf5_I02N$U;D z^>}L)h~66K!L}xBeQR1XE4$^_To%#xacxYw<_$IFVFHr~HRaRStq6wUxxh^9K{nwv zGSbBg62eHHrLdO9f=R$peChd;#blkTAnf=uz@z{+E z09mH;dkVd2@B;WHFHWdCk-9TsY`B4HF0mG@Y0w_n%lfxep=Py_`>pF8HAic zI5>Dzt5K|fzC3L9WK7<5F*_$RAK>TKRTAWIyYol#>f`FxkO*AF7vCO4Eh?p$q_x59cLmsMlbT+}V zaI|PtAk*V&lNx5bTV?I&R}u~D-glvDnrJQ!d9;*d={1AV_H|(ab9o^1DGx zEg*8wH=cWZ&jMWl(Bb3=VVJ2CsbSv&R{t)jDfS@mUP+~{)vZwNT@_+ChG}txxpgN5 zoEUkoKQHx6+acPT(tX;P1!#WopOG#Ay=mGdgRh0xa7Yzn`F)du8^WH4JELXyeXy9XZNETOysflQOlCGBF*;iJnGrL6%1H`;Ol5>#tPMvU^qdFg6f+ zJ15{3Uw%mDwl9BEHY@WzC}z+7&<^JkfyR=ThRTwkPyL*}H=xoj`;$p= zzvcr(!zV$+TpgsJOE5~&Iu_a!B5G-Szdsm3JB-9Fv?8G!dg;0Im|<{;?oNIT>Mw_u zc)4N9LGY&l#N!Pr@+CYtT`7<%?rS-11^B9A3X|D zz`k>awRwQ!@Zpjy&@Rq`BKE}8fF_hR1+je_VFF#Pw4WYkP`_+9>`NqEb*gHg1zKK# z9$UEbB;f-%d{2K8i4zlOMLs6c2Alex9lj=y7xD?ln8j|GV)T%Ht{_O8$oT_~^dpxb zh6WP}2HLBBFTy$k4vuWXZp^LOJN}+>so%B{$y?m^&t!i3t`;ZptDkukl%4!I;I-4amD{4_C|db zZO)L6QpS)3z?ueRT_Op~KDooYukNekjPxi;Afr7!vZ@W`8FH7KQEehTFy}6Xhdg}Bj%BxLhz^5<=~ zrJ&XZ1!n?b)vw=MrncjT`pUz!c7_Mm_2vn-!H_(%@uWNm`l$j4BYD3>1G>f&!KDEh zuXthGF+96Nj(Oc46AUNoKh0wc3yq*^&k*k3OQ%^>h~DYB_{L#K11?8(IF=tl4VlX` zMOG$&kXWFZlMd!&o2S^Ck@w$&+a4-RQxde8 zhGZVKLiQTS?|R%5$A%c8!MMTUp3#~rR4ufb%a_T=gv~&9CX$k42Q1}xh5@QxJ5-Se zO<11i9!(6?i7+79&@ktMc#3qHQhSn3jY# zn()HALZ!onAgu|0NiBT3VTe(OOFYa_MqYyO+Igr4F>MH!VT0Sdb_l2_5AA)BkRplz zY67NS#Pi%uH)8<~6fiX}J=utEmR9nJ$b(Slx}(J%bj-eu-&-8ZJ$G2ML6xQA zAn$*S1b*Nrux5H7vK9w{fGcQ-XFC?hb{WqE`jYR|FDtK<7QdrH5269ZQVSZR5JsC% zYD*y4oDl33NA7(pbp}7Lf=ANz3oMdIKMMhB_~RphsVuLXpoz@ncSX`BrMlA2&3=Le zr=R#GVf5O_Xw@XE`ka;gE+ojMDkPy4EYh2}2^PujSTtg^Dwjxl`x8^S*#Bo-a)~MA z>X3;%V(y9P{#itTa%OHjdaY7hm6%u0FA6rueZa!(z z55fR4_!W(|Y)7QOjkW(ASX(RZ05^mIM!wMa#KRYB6NL2nLt0$|L~%@$H13UkWcF=r z`R6Sb*U{lvTj&`WWK&2m$Hbo+Hj_uVHq@qrle~7EG{CIF^po4H9ib5MAw#`nF)#2a zskzw?mkZ`ZT3m&w({4j*Y3f&}v`ym3{rX>ST8FkF4wX+EYy#6Da?BGl^l2ksF*uF_ zSf~FIiseqVB)Xk7I-U)Z3xPLz)#r(2_XdOp+Q|V>M&R-JqC5!o-U^;CyNQJ96Fkol z0ui+IH8F;9L=Cclw!91!P9v0{6Ux$3o=Kw61;|qUDTx1^F2F78u$?LlqwQc#!YOyj z3wao0qG>yrwC#IMe%(Q5{p2e7gCJtkB>*DP;%-TMG&e^bSEfYxsr6E4u8>&@`vA)k zxdcFVEn&Lu2qsQM&ZGW+Xv1=NzHkVxy8(U~=QJ_fFaS@1l%flfx{Z7aNx5?ikptdu z{Iz(pIxZe5Lz~Z)10m7UbOc0FEs_(8Gq;xm5{Y)7VO{DbvU5p+_xE>uE!9gj!Iaau z%TFIXWBQcl8QS$m&d-|+{G1^WoC~bS1nb3WC$J$>;x_+XN(!O`AFjVa!rEXG5`K;b zLkucjdLoFq=2sw)uk#>uh1rhcpfy5-0i{s0rF|25=m!O-h2=Vit8$brH`j`EeQw`? zL6`I+b)0m}!FGYHzOt7qDQX zIS6n~695KoovaVSl!6c;GgU4mm$Y?s0f=D8&_)T~62QOo>)(U|a=<8| zmh<}3Vo5buv9oOvSK7;t4{f@qTbfzW%O{eaBbhLPRl$D5)gGw(des^iu6^*W01VD= zV`SCyCXV!F^g(CP^s5eD;YpQ(DVV+nE2t1WsC?LjMo#~>30v%zN7F=bEEDaTetXht zD1o#E_J1y^GsUSdbxb#c*pR9T1iLgE)cIhl2K;)5od|btFs`W=y+@_Ni2Go$G z@Q{h=CgX5+t#?(wO8mjy&(d?s1W;^(en=qu=JwRZH31Ya4A+#T-}62FOj(4Ize6K}@W6YZr^?Dem#2jOqCXeRmww! zGoXHbb(q>X%pi-d^xzQ?UExb;e0Y9E7+$IvUKF2wG*%JQ^{QuCsPZgsEN-9sivbU` z^o-vqspl3owq}(i0*$Rkr}*|_c^%3<0OR+;sp0(+>IjV)o+Gz$AOr8Yi18q}9&GBb zhCVk~4W$D)%R_z?rKpk>Y~a!^-}tp}xLZErW@WFlQsU52v7F)kHR6QLkLPa`e7PWu zP*($;n`-Gse6jdZF{fFHdOy&oao;`%FPORU1nYRZVCpQF<}Y*}i+P1BV@o7}St8x_r>2-9wNP;M8 zcD9UX^E6p$%+jaBD+&%Za`9O#c7)A0(g;|qKb}NcWL6&jTBlfN|LX0O_N>=8LS}~s zEG>-LxD6U{;Q6zLS7gq*oU)Xj)4UHIuOt8#v3%G9OgVIN1CN5DR`a*hn4WcMhgXDB zET3mhL~RFhA}g0OW>3rX=Z(1R8A>B*u+jHze?P<-rw@NK&kIl&y4o0 z%LA25?zFbbb0q!k(@9RF=!8@GnzM3FN?D7!<#~RA`YxsQ0HN@LgA74Kd!kPf;JS7( z{bOMTc9-*QcbLo2OA#@Kh`ezN@SyqA0S*o(*?$tUfu^W(7FFBZ2>=wKiV0x*H62-`5Fclu*L zA~Ipi-Mq2=6WV6m{YiUEZ;SypCJhiu0!L}LK>g?tkyI=$n*VCQQ_2pQKnKvZ`dcf( zW!^7Wh9_W1bPC5%$)`mLLn%YIqI6mGFsa$VK&*8n>!rELxi1ZUF(i)7X}Hj`zyj*c{HII61u=Y<{rl8{jrhqkAEU5q=%DQdXOIh0xDvYHV8Foh+13dBI$3Yd4~3b%RKPN&QF6obt$IcIBy*HauFFq|vp$<%f`KJ5a8XFyi<8}qXRuV}*ahZQ{g zB#I4Eenr^N1*2yg6?F<4vjkE^Y?n-RvKCWFXJJauev8uSfw0=yUMsh4+Z)tnp0TtN zhyM5PYvE0}LBHz<(y1Rt%#K}6GXFh~JA5SnU z(4kC|If7CaB`fZtoKX}kjSw>H4J{xGWQ8v&vsvc129b3({jj$U9dAK)8^_krX6J!# zIxW_rTP7Mp)wT=zd62oUF0=NxDXnf+`wUUv71&SpDi__ySdKB&|8%(&Ba<$!0N(do?Y0_U~$B}&=QlWP~%Hr~FH$qctY?fm)58_koMPp*h( zJn3j+J$KN@k#?RE6iF6U1l#d{Cx%pb1cTHP~un?rQDjRQ5zSi@)HkbH|YsJFE} z%IdEucy<51w_zb#xgMV1E)d6-W~&UlNK=dTyp9)j12D5bqpWdPHZl%RmduPR=4A;e0bB0cAG9A(?*V0)a!t%S*Pumi8vLLfTp)urZ-phYc`kn znQgB;!M50G<(_T&5zyFZTCoXVP2ukAo;;Y=wPf?8DSysHM5M?H_ zM?Wme+|<<6)Qt}@hB3?{hFEjUbOat=K2*|1U#4c`%Hy{-#+zE$7d#W!Jx0&BJ4!lA zfa!-QG4}*ZK9e$>O|?5TBlv}c?B5%;0m^F+?`B+!rxzE*;;)*`YcRhV4_Pc=nV4M|q$8`7S9o({=o;ipR}!KWvPa>3ogeEH1k6m9Ibd z*&c6fMz6k4v9uNlNMFG7E4_Rd&GH2dKT9!=t9!6PxVA|wDCi6ghLEN0zV&88OHD1q zXW-+DVY*u(O|nr_*!s|ws&Z<�ev`Q}H7y#R1zKkC5n?0_OP7^FqWWeXhX0t0pNK z(bt$TL*ehNPtM(;VA@5R9zN!e8~K<~cX3NnUF1p*`5e(DU1F8lRX-)8KbL`E|L`3V zNx2$Zf1S7Do%}yd%DH81m#>ET4sG1bNkca-B!p$@$27Ju`3?2uL@BKov2V<7mu!_y zZ{zyp_2QITSG-eP=P-{N#gu#(3@bdT4+KZJNda3|h8Nf=HS=!63yn&_8xd=3Jkhf$ z!}BGTsS9Rf-o-Z?Q?|cG3CC|q^rGJn>M0i8LCYqr+E3?cMnhr-$;c_-;y3nImk_jg z*SB>)9>F^Z*<}?lDtFvDC)3w(;J|^ymifdvBjSktDB*-0?<&&u_8~@@7`@G>U0<++ z9+SbA7tkuQpQRryewLjRBRYX|j#Qk}?Z|6*YO7K~og$D#s)y)BWmu8L?D||OjOHli z(rd40>4_~TSlT+@@R3Vwl4m533X}aO_w!RFZu2~QpnL7?*4I%LpD*2+wLVo|@%I8{ zzZ*2>_N_CqtE}T$qqCAa_KGgmtQr5qR1iS0X_i)@emeG`q0wmFbyr~nZu(wbqnm8n zm>_weO@nuHR=8~I#88`0`PS5U9d(wcUZTt7AX?2|`@=qRC83w>Mlt@JqGP!z*B~9k zLWkYhn<%5xrfan)FuTkCh{hk_05N^8n#jP+e{_`}<+~B3W?CiNuAua}a_MTdYyUEu zusJz*oM-`=N*{Piw?l43yLb=$GNYte%b+5I@-V7dC>B1^m zR*$`EP?Yr|V3rCL9eeM`ru`w7D!cmZMv3U8-`dIMVpnov@J7;{b@x9^3m-Z3Y{Z&* zD_zX0=I>)SdOkw+&z36W$kA!;9RD64IRcJ9N)qO^ytsAe+9S#M%>(p0L@&TU7Z<6d zXj3LQe0J3d7TseiYm0wOit-x`{PWm{J|RZs<&$+&Hgo2h z5yoyB+HQt44OJ{z%<^Nov&O3L_s`N7xT*-x6tM{ij1IE&RK^F;>C|9s3ZaVQ%s1ZD z&nS+C*X#c67*TD{>-$e&9F_U?(pP^n73=qY;t~6n@8+=ca8aLp%dr}3!iDJCk?<^K z&vypzO3_=}Gj~EnkD5>38d&H~S$*Q#8lks$jjwQi7#*)n;Y=>q4V;``tYFUD_J8e# zh|!nSX8$YmI;3~P|A88khWk?zH-)?If|Hk_xY3dxFKoZ2t zJhyn*p%TVmg-uCC^US3grB{BCe;gjJc~y-@ArHqhvcIIv>?>x{3Ka?IQMYkLr(_(> zW9Yhih|wXG9m5&4$o+&R?gWb^T_Edb8q`Plm^+Gd%I_1>MvGg_x>l(|hG zXL8v{RZZI(QAKaWHr5s{+1W7^G~V*hY!i97m?+bvfBkF?1U{OvO;CKD`v$kh#Mp6S zW}dnS&g=07uy2cfao?kBg`l52EM{x5^{qZ9WVy(?lQ9ObhGymV&M6W5@vZoDNTGn5;{NXx zX<|J~8H=}B&gYFdI$k|n(j)EUEB-F--tzpx?lX!kjav~2haKue-^}@3(<2`l9v*%V zpct`r=&rGCgdyq>V-|xIQ&eFazpBmQxvNAkeJ+~rNaF6(0Q}arT=aY7^=HiHH|9($ z2FqKi7a4zW5&2$7`1++}teA$yJok{Vzq)`Pmy%Nml3Kg-F zXgU?f+Q^T}S6DR=!9a6CFTM63I1qE;!8>bUFzl|a`*)PGkDYY|aNoPCe2S{MV#&TC z!F=~d-rdNg6D;BHXbe@$z9Ddm+VuDVjk-}hr>I}r58#I@|Hf&`?C6on@5rDQ;BtN* zCm#GK9DZNG)n!xr>vw+e68-Re^a17vyB)GrmOgb32YfBAX7Z}B^qsjdl3ZJRYm~<- zu>14DocgGES;E)15;iXQOAcTgE-RVS%WN{_ViKsrj|B?;TuuS3;|dS!u*jwlru ztBk1E6!us{JY>%V92A6y^0s)NzF5~my5ZE6)b0sJz-@?W8pFoHx$16HHPOny-p6#g{Jl;f&|&AJU;;%xQ`;X{=fW1tN4U72f4 zG2cMw-+5+3LoqX^{p5EUUI>9<26SbY{c>rF%o(YY8`tmLVq6s@K1cKBOl@2}*jRT~ zwnF^kOUr9N0z8a!ueni;qm=x6K}x5od!>a{9A3?Y6I!_mV$%j)A(Y*B&e?@v8S-a( zSs!W+gCwB|RuzEbEPOpaAT+ZfMs4{P_i7&;wmSDNBc#h04lydP z5hC|$bEW#=|eu-u>CWszC&qFp66I!fh(Y*Z8a;X4HJEb(E8rIV;uNI`YuH-0LG z_x|L@M;I=omg$aE(ovAcYk2X;oS)P(zTYR)WiNgO zyKe)d4l{1;mgU^sK2|@v0DmngV>`~z-{GLowF<(4%{)|B5!HIprtr|JB(XfNq)F41 zdBg7zqyK>m2|zW_rj-*ODz_K43Ai6K?;X2D^odN@Trxj!?`>nAs;1XPoBi~&g)}9R z%Mk9FZFTg7bZi1w?Ot=Hz}>6#t^$S6^%~71Rd%7%yXx;S_t zt$ev7PH)oT_RV1JM{E6CffG#%%Bw8`QG6>kQr&(jVIfv&iAif$%O5ydUwiap6W<&v z6Fcmpmhs~C*}t_NH&TIG85T<+5v{-jE2d1K8R0F3_wzj=JtlSsiU1_P;jIu^rVt_$ z12*~{@dWX^EGlooFiB*1lh^f3mtR~?6WXJ5B!8FTMy%2r1aV71x1-&JDdv*D$fk(E zVm%|}?A;~_a#xV!!8snvf{hP7d)bjzB}+edZ+|(zqRkJa54CYhAB$vW9i)=5Jb1Td zsKHz4h5CdIc?r6d&$A<`fhL|44`p0}NYs9xL{5hW#nr+3gyFT9ae7LB7N1huo;yjb z&wqUL-Jo$kkm45a9E#{1v?(hCYS$&-Bp%v6bD5a*gN`dT>3kVm>-w&YhaNy*!&?ij985sS&kCNa*JE8-5_j zl*)Ynf_EvK>~Nl0&OdOB-Lk>%-s?G}==9cy*Z4c0bLjG)or+@Iy6*0Mt>7%jftcqU z_udxaRbCWFgPc{vTfq-3ZDye=9>R0)Bi@CaU_mpj1{f~K9QZafW~F|U&y<^Q)&CHq zFo4D-zr(JPUg2U$d;*Q;!ZuHD4D6}d<7)|w^W(gcEkIi(h^Cp!=CPKa!I7uay&pJ8vY}rHdBkJ~S=vi+eT$}~wv;e%L7}&a*03xDe z641-lqNOI{=)U4uT~qf@4QM{Q=j=M%-eZ{#(dJS=iu^w{4uPI2(A91YbOkq5dnMu^ z15m)6Dz4IgZaQj_0FM0W-{F6{QB$+Ehc;Vmu4mC%2G{h-{o+HBkP?7|AROl^&*XlN zc{98Ncz*GL$dj#;uK8Yn9=-%52mw7idF*<#&aI$(UQuEe&OGOBRZcJaVH|)#IH90w zbu(d01*q~5_r>ReULX$yb~x$fg?8DnBhL)Ur!y5BcXn#3)B#SIPF@jTO#X+%}kW$rp4 z3HUieI@rAoBzq4wsev^5inv}1Sydf6MvtALXt@YrrxxtnRhJqC@h{PQq)%?!|2&PT zpP5>5)3pHS*KMqIO&W(WVY_EfVp{Cxd02)`XoJK9h!XVb@0(q4F2# zJ}mNy&+|Bnmlqv1P4hM{I*^EWBi?`d-6?cN$lB^``8zBA%$r;9tA!NF3I$fVIxVhD(!OdjKfxSyz0@J8@s*BK_WI$@|uGw$m!mVLT+5xsx z{KGk7{QTE}Jx58gK}JV44rH?!|6Sc8AJ)Wgapd0HBQ)FW>n>WJ;vmc9Ex!(h$pqqc z8QU$FAE6>prrggQ0J;1iHDkRVI|CX7z+Xi`kvVmn`a8x4e!nt|yE*#)L1tRH72FwP zy}zc8@yNOTAu%*!f}4v0+e|0--z5ooD6v-%V({(K1kI(3Hm*lpE4|pVS;4rleR&L?aN7Kv{&uC*`91Y|dCsl=N?)>V1R&soy^VyDmb4<38D)!4InyyH&6 z0f16w;%OKKXPivp?+|A&o!mWFCBUZO|8%zX^pC0=yn*wtvWC$=-ao&Z+91td6AYAd z!l-jeHRp2*41eHtPKGkGu>*&tXe0PnR3d5W%~sw)$Ql@8vJhADJi-kl%mUo*d9lT8 zdO|NQ3VcSJDtZcmSOat* zd%gvZvK$-FccrVC9p44n&2AF*>TduE);a!3ZvJ$2;kOrUzvKx9m&SqQ!UN^W&SlX+ z_Hcl^&Kr0c z2vJj0bsAlsEv3mQa4tNe+GnM*KG3D{Q6u-#U4aBKIj{YuYvU4kcx;N)(KzJ_={MjAFuLS?R3PHnijg*CMuZ5>*2TkknWmFH2nAKDBSVjNthgj z441SWzajgc%#wb9c|*XjDC@+^q1o~Vlsx-%@yuDGtMxmaxH4MIRjAOva6YW< zFzABA!sNW}3mFRe+N-*g+!j?W@*&}0ItKAZ)+U!^?=F6e$Ue;R>Y}Z+=M``$sRg*X z9$@rO*o*(H{6N!|M=q5ABL$mP{Yh>C$9-$4KFZ$y)1!4et}IvZ0*zuhK_@)7;<(0tx5Cm_Jqrzhea(H>C6xM|;cjg@1w zuhx7IF^WgVevuFJ96L?gU2apvTk)CZr*?qQ0T>mo@y@AFigJ|DC6+=ZF1>);wJ#Cu zDa?V5@}Slt@1I~fKZ#UZR_hF6Yx$E1Q;krj-qL{*Dcz1rXXlpGW8$14M)cyxf&+86 zb*Tj>$~LRK_QxFY6Hb~b5oSkV5zY@{Jq_yE{tzZJQm%6JAS#yb&kA8{GXB0jbBM@+ zZ-sfD+rX?hr|H;u2ge6bu>%Jfg6}b_?6b%wEAyYV2h7wQtU*A5!NroL-j;1`xMFXl zSIF@ao{GJz(ymN%m&LQ_-=mTq*Y&xolD`)q0IyOuhKmz0DmK-x?U?ez%3%;&B#Y{S zcKR?(;6!&T+oz`g-5p!NRnzvJ6bzS72tE*=SBRT1B(eV_cWQj_)tsbu+pee*w$Jyt zRxwb!*;1R4{axORv&G?Db8yEHS>c3Nrx=?IqPE^|29fmMJMR9n$Ws#wzY1@%hl{Me zuGwB}y&sGyjixIdegma38z|1h&!9G$bc@^0?E2B9rCdj+sHEFr^(c06LKYQpZMio= z76r-X?~#%*%On(P#i*>Itgrc}#_nA)Z+(Sb|M3cE_KU1Bq~yw?3QE%!Ve8I z9KS)gws75Rc>?g|TG-=@N6W~{#?UmcP!q$slAzUy+*sozSkNX+A83(}7TO4(!uk=9 z6Va5j?R6NedEbwrGJ0r_1||=l28w=M_x-k9VG9n6&^?A#^Z4V4!Jvb%UYl;`opV4| z;Z1V^!i5d;YOIR%0~g^wrmm@n+sVsiG`f6x8kvy1M}m&KHhD$QV>bF&@P?OfaBbW* zxC}sWl=Du-BRX~mTduC%3r-Ub)*q5Be2=qg>HmW=_D4LO-pQbvta6x_UG5C>KBJ-hc}&vz zZ?nwzsH)wou7?;C7=js7Y?7NI*=tx=u?=#zFkCg+SJMYG01Dn zo%MX{qLuA=X@pPb$z?@^;@3Ope7MJ1t2@9nbhOCgCt?bRQ_wPD-e}3QosK=x7I`@6u*Y&)f*YmpW*O8rQDj_T- z@}h93a%r@n4-iJLCjaHc3#jMD1SXhc+xbu3*;h{e`x*=6qom#zvWJ(#VRL)Mwh5FD zA0d`5DcpW``T@6y6l!V5ZR^l;J}ey_*!gm4(E^kZCR_v6K-n{-9Et|1+Lt*&ziqBQ$XXl>)uE;ekq^JE{zl2xhx>V^#t*KS+K zP0(&@ExRQ?$zXr$n%Dj#=U@Uz?nRyL=HXx`y4PR$SGem;yYr-~-?)EOog~+FoJ9S! z^}+KTC^n_Om%rQps2kVDz7Uj}>*sq300^hGGECx5S4OgZFRLSaA!}pE*q3yI3#(9Rwg zftY|o_2f243lz7s_IJkF&Y(}!ocZ|lN`{4U@K+-xfF@Axau+YY$CebSMlT85x3iTz6X+C|GlUiRiaRrN50`ZGJoy6g(1VHJP#d@Y%C0_2v zeYdcGU4|6zDE%cm!D{w4ai~PwHdO55>o4ybp>NxXRH^@{QnUNOWCB8!qO7Z$VqlOW zNasf1dlf(7u?<}0-|N+PPrsxK%R}dMt#wXIJ?7yJFwIe&*6ct5cq>Lx?JcV_@!1{5 zxQbJ)?BL5ZN@}2fTBX#POz(p`#V@-&1#e4weCz*<|E{ISg{KUPtp!_k}9@K1@mB7?>dG`_Z5$0R*ozIiaia!mt8GUhq z$~EQA9U*yf>BGuLPvX+Nw}Pz%q-T)V;^sF5ss~VD zy(CckI%aWcUnxOK?KOdRL_cF%NM6DF>OnbFKnx7&sH1Oa-U2g%&U+c!W{%+fc|@ZG zC4(%NFXpT@8&G^Sczd)3|3bNxP89@WTy0DehHRe*kQdMvQ_?#%_3v1zbOlB&+#4n^Bg7TZuyFk@ec%HdtcvOyuuyy_98 z1PLHr`$^>|ztey~!)%SAfT}ZiL3!FB2_vRVRpq1)N5sK|07RG#oIm)D_~ze2iXy3G=N#aGe$H}bppmCMKC15urD zBYDNQzvwY8e425y&2uCm)}6k=6p`>XSWXF~5a^BTO{bq#+6H+A{qeP@6X&}5nAUNN zu#wG1-AjyIyfBOrU-5N3DVgPM z3?=KCa-{Ojnx35U%-EKTxru8&E)k9df36s%fJ!BD+8tlXH;z1b(E6P8j_&lu1UG#3 ziZ8MVA<1mE}kilZE7d-S>a7_8p1orxsQgIJ+HwbBgyuar`a415jpG?foKE=+Qi zH>gOEyM)rngbbfAs~q2F`i1cmdLq)-MqBZ%tTP;?n==}492R#!+*R%jtSj!lOF9w2 zc4kh5HvcqN0Stt3%=2$3O1;sIOWl7K7v-z*1_DR`k4D~9+SBRYjmHZK)JkY*{l&gF zghnKz|6Y#^4qHzZl5Zzv@i{V&%lH{rgsg{nRRMju4Jq}g9vostXa33?lm!U5zCHOo z&cJS+b>H$hWH@>g>YV=g7?GF@ogKeFu0s`Zt~pibL;h%{eQl?}S8J#7HJix_NC^gz zh6GiYtN(!a`*wesFswSDd9&X1Gru=7&HAXRgqd>P$-TWrd_{zh>c>jmOHMD@DY0cY z)O0(8iAw+`u6?|trmC#XT)~0 zqwlp9+cAU$BJC2qb>>T1FQflL6m)rc9u{Mli6NR{^ap(cWgKTpfFc=!WSsg2v~0L8 zi^j_z1#;p=lss3d2tl(sOU;h=K|{vWk=Iycyv^Bs8&VrTM_;t*QGVc2#r)#}RwssE zi!PocnX4lDe;U56iSUWna@tQaj<$co+iO2N=*daUEbNQX=wYq4ga)f>ETQ1O10w} z8$$isCm3D;Kx~$^!0e{l=ZMk*FmFOi^}rucr?(R@7PLJvx@5!maM};SWbp2*(G{UC zxGvTTSP%>q%k~L)+uldo*MzpAy3^^vVl|1Zi~eh``Z_$W1~2#!7afz|c9p3!wdVwr z0HncX!lya*7wIA4Y0j!j#hZ9`wQu)ZQ8BpmH|Raw{9>unZ`((JOkwc;xrNo(Y^r)v z5EMJob?M@XiSsYrw;ZMW8@Lt3JjFhwmDzcIi2bSl;P4WM(i;0@%aEfe72l|3l*g3t zXaWcGr22~jgPPJ1yVEw%Nik-GWC}egHFHN{c5)tBPc^j*)935%%%7D(Jpu1M87GB` z&I$uYmhLO;gA6yCiOeHf^O*7o#%OK! z&qg`>1%9l^TZA1Ee2OBqU7ZSj!5J_01=AJy>agDL+(OK9-}Qd zDy*aLP4MgZ-Rz3YweCfbCSeql3lES(5cYCWckWFWzhGVoqYwS~BK~bQqs!eW5CM8(&Zj zxg=~lFlwE+$wJi8MzmJb=NYb@P4jInnsIGy<4OJ2*xusTj*}|em|{l)$zXzM%O3BA zZ%w^~0q(8Hy0g1X8!kBKPwI(0zIdSh5T#3Y@pGOYS$ed!9@)kB6}eKyI2NO?NGUo7 z!WtM#kV?j@{c8b-;aIZc?g>7~@PhOlPO5q783-N(xeNAs!OdcE;tu}e=tLDg-UBk{ zI5@Qg(P}d12!m$+8oiyKcmk=tJ2>)v_lPLHwby+gCc03JQ;WM-dF*e*x0zrQ6S{Ze zo9p8-bi!*mfVdfN_=c3IAG%+IwC|3idF|u)M%Tux{a75CME{NOZTx&`<7+!`Ea>j2!4}ZP zlt%a*35=!pk0h@>r?=2<*^r{@8OsMv=?PcwSEyA1gy`*fIf>DBB*V{-iX9 zPg!-H-RnV30eQQ97F^viW#E}A)xyx0F7ELxiybA;iq$`UXD+sF>kZW6FYOnG_ zfWim=M^6?Xp_ca8Q)x`&+m&l?e|VP7b~P}*5QtMhss3|lhRPsV_uX5-mG&q<_ak5V zOzV=Jy~O0GH@#s77@x`2m9A1i`S4gY<;dM;Vd4vrsa{DsCC;RF7nXUl+qpUTkb)*7 zKTdq-Qt(#6!uV-!jLr{d62?4(m8O|+E4B#p3qudh6;#Z6G*`>rz2C<+jyK<5^b@NY ztzr1ZzUcyx?Bly>%HWB*Z806YB~q2&HZ9t2Nf#ipwV~trE!Uyw>ZmUa>$BUWI#Mz- z`h^t*u}-8Y!iY(CZ;uPk|ZX(5ZB^t`IQfO-e)uXQ+0C|ztXd8hYu=Z z{bXBWYX|#Z#$E`Z;`a)tSqM!Z-aMoUdxLu!fZuQv}SUI!Pyc%^@K!ES@c~@-~fT&+GK3MR#{`ZMxJe za0)Iq6gxFz+gB9M+au=-MMfLA-)y+lTTM5xv+Pb_+pW8tIja1(7X8F?Rl8CBk8}?v z!^+z$$zE`o+3LuM$v;aoY}R)7l8(fK*Wql_sLA9+;mP zGgs;m|9DZLqWXh9Xtpx(;Z$xE24y~}WmeH%6-5{16sZ|x>M2Igwl?%lrZz0k;69Gd zgr1_kl+wuPHh!e^(oILs{h?AvpGME6Crkyyk z?O7B0&V4b;FxRE3a_M(lhFBP#@RtB1MVA-1#r=$okm)#NX=8I^iBR(n&uj zIhw_cxr9?@#db`v?h#shxK8?lC#~9*Lj1@%p+D1rN2Pji-+#hAhivOqtI4_k(@+QK zRw>iV#zU7}Sab~WQZc2f?G`>IfGiupBzSlBK0cvwDyu|3gKUfGE#k^Amr4!)5#VuR}%HzxIn)&=tSj*{!GC77J9w%G1?x9}J`2UhRs3 z0{zJ|?BbM9JAMP|rF(vMJ$|ezguidRfa>$S3D$1aG^$fYHGOp;%#*G8PT9Gj>5!fJ zD3`@8ok*3LOO{dQ$jNxzOTp36l>D{iClB{p{G0CApGahSTFE~#j$sfU>^Br{uZ$_qsv*vtZZJxC+_{ zsS34kSPtmFKEyNJ6b5k)N#^CL4*_QO(lcl>HwNLUjTR2!qXh{%THEjLc z^?^I+M5_8}#rZEoeLL}Q$xL#Kx=_m`F2mu+u%@sds72m;mknKDg>nk@o6LpH39nUHP!sCv1Tu_@k z%dD)njLcUtIgNdvve}Tt~%S~&z2ldUoj2ACMql5qgn#V{O zKXdZ_lYJ4mzhZhrxX-;zy+3AGw4s@o{8bshtC*ESA$&x5zyG5vDsbj_?$-Ldd}hN3 zCO!oj+nl~*uX4jTfoMvOBRT^1Ahen@@2a=C>SU1fD0{KF*%YyLul(?Dxq!AYikI5A zQ!2rLJC>W)p0BouFKcF<#`0_PeBn@d0&gDwVjA08xW9<><3lzvE4PWqDg|_<{TkZ2+u8gD!dVu7akbNQ+2itVA%5pH;ocR5OtTz5bYBo# zRuEoLTbZS?ch?$Wr=Xn6Ubka3tJLqyp|dX)p8BHfd`16My1}L`WDgPJ-}tEpkp`e~ z2hdTtq~OQ_m9*A!&#H;@@RA_YaC+Bxp4<5K;m3$4;7?zv(pS0^m#<=D_&JxLl1JmE z5YapS=RFUH@u(D!M0ZaQ(dV=UPAu=M zS+a5Wmt}}dl>RAwC+X>iR54RfNn7YbjZb1KFK?V^rwxcV5%UCm;qi|lcQHV5`eIIdyWcuEX|NxMzk5b@IgYakiJr5bGBPu%dt zm6r}GPa1#|BDe&k*mvZosws42DrK! zM*BJzH!Z3klBOQL+SFK8C3jo%LECDTyT8hw$LhvNSfo(|>n;r$yMp9cuiNAwWY{aP zg1zOJtJtOS@zcUfn|y-#W@c`~T8Dl=hf!06=s+#a2VA-jahL30C)zbq$1D+p98~8$ zOFIQ=q9g{0|L!=v{0NRqqjWE@@d-uOsa=#%Q?(zB#`bLByKESn@fVVxhAPQ-{R^9N zTkpF`spJBg`E~qFg>GelrqYop4+ZI{O{d%^5mB}C-x>X9MNp_W=6Tb0uj7BVv+mKP zT(PNV5UgO>Gm_~^!*QH@yo;v zYfIyaWv?o8cuUW5a(H+d=bq))%*NqlEF!f2u)&#Zs`L_?Jc9#C_^RU7ZIz=H#}e)9 zAh|`6Q7NE$QQPdI1$5R4K0b|0A|Le0I$nMg+Xc^}Ym!noE!UMhVD)lV>sbq3C2t?0 z7F+i1F0mPUJbJKct}?VL9EfON&Yrm0YZe$X`qa%|#XN?Jp)wbTTO)5!n6Cxw^kjd# z95jO&3!cPYv?och%QqXD&!(Dxu(`S>V7zp(#xVQ?&e+VsUy)gRlMn<*oopnn=N-^H zdXV3JceP;snrVB1a)Qt?sUY{E#Z%YMN?YZ4zryE(T@xB|abb|$d>5LY#izmucSwlf zmf=C{!Z;?5PlfkSD%)O}>1Vz0`SX1J-h;8baggmI1D zq`*{VlbB})JHOqW#`Xs?;6T^Dv7UZ;qs|Vm1J8;b6t;l}<#eAQ3mJw2@&w!}xu^-l zfdnHa|6NR=o@K^&+ezhM`U7NO?A>N3_U+H}lPOISlUs33QkYdTe?D~v7LHWv z@=%qjy%giJ+V^Vx=2GBfuvQ&9)(n|*Er;oY;h_}~YNQ!xj_UhH_+h%!$WElU90_nx zp6?^|HgWnjHyd0$<7XMaUGvLfkdeM}`;Jre_ z@RwC~HT%CYEP|^IEq(U1eP3F%FsAWXx;Oi6G*=s2#Okfg;v2M8krrMe1z{fk!2NIX zrGLM=m!-UQ-kT8$vd6(h_+npscuAb;-6tp?Z|*P9Z3z!m=GZ&T^5F@O2i&LiZ6v@C z?LqHk+|M)0!#|On;lp%k<*oYbaoI)9S)!^9O0DKzqV?Jl6>1}N3F_0sr=3?{r%OUU9P-p z(lgc*X?xv^CS5WB@I`Z)+Acqlb?N?LG;>?ls>7bWzMOBC=$Lo_)#a)~{xAR^(5SU^UdBP%kEhDthlQ&|rJ$UP)WyN|L zhBc?|7@4Nz%?^c^jyVZaEI1v#Y12T6P*LT1=uL{fU#7LJ_fJ)|bKx)w(P8b5AUOc`~cnUA*?OAp5iI=;!P&v|g~g3Vf(dNKn@=jdpn%yZ@47a9djS?dEsJp~c;$T?w~}V8bCa=8ww>T@D-g zm;8zoo`&^b#)qU-a%cSSnD?Gu2%Q1!Xijrhng6O7CjSk|c`sbX-JO-oTHjZZ_4Iif zq%qv+sJ8EMo84ED^OXwMaA#_kSq>doD2w~7X&dYeLn9RL*DHMHKr46D?YT|hFo{9GSbOCU$c_3fl#;h6Wu{k)LaQ(;qusA>QMOvLn zKhdRc*#?wz;l?6cV)nviBFOV@`@FRV-K!pX>bO-!suumoC;q|9pdrM+U3N|-r#1Mv zxjN9Wn2r02k3v+&!nl~=a!sinq502tOKDHuMsgZSNyWWv5dl5Hi z6{pspRvk(Hqv|!ub*F>fCkNUY3+h+g%*;2m#PZn;#|4&~#U}H(p-g8mHbzbVu*K%} zCDm8N*$lvppuzf~2y{Ma#2F3>Kei z<}Yg!u9u4MG+}VpB5f|HS{RS0NsT7zMv-a8-=8REJwqGzmQSIcvG%rf`oXhyZlx19 zQ_s+Ld9bnUO^jN4KENvf8qj_U3oXG%;-k{9_lHljgQ06jD`=;rHdBt5En``I0q!)P zbxHgGJx2+klL=IKN~mxduQxF1Dbrky6GeSqw2Z_* z_aM~>A3V7cz1$mIJ~%pQ$ye9F$n9~op`Lc`+a_F=y4|>vIaqNDq@=tGTF<%lLKzd@ z`}oo#@oW3vk1aMzk`+{C!+4p@`&mj9{QeJ}BY0t{CK8q)5Pg^~p1<{hj3G`<852Pl zep*mk{YT&~d$Z7vBfHY1e=vXJh%j$fcTza-=3lH+so$$y*wUPvzqz=8>?cFs z<*U2QLFbF3a;}KIEcqJi;daXABYrZU^q=QS{KE&R`C&eN$q$>F?7_9?GMT7k z-V>?Cb>OX6EbTV=sGJ}?qSs>5unV(Ry-z-Xb?#%o^J-_wDPcW-Prp3iCE1#EE~ll+ zH5_}C<50trknp<#wUCyr56<)Tz>PdJw#OsZqEh!wP}I34Q2UwK&Nv4(6>fxSz3Sn;E80Tt;Hm>z|-y9W`7JoXh5Si9Q<>3-Fj0SGl-0GQq6&CLhNvxW- z=ih95pjG-+B@Ry=s38Spyie05ONXv@FOiwf^vu^QE62I*B|f(iXlhT-yj0zfmoj

)bNtXB<>| z?zw$VG?;}cA_WMLuWxkpU`bqq^-gI`l!vzyJIgmqm5DEFjm;@^zl*oW_s|8wm8e*b zz0XFbT9w}8+|d^`xK_6-vkAYgt=Keh)4pg{f8qatTnp1$c}kL8Q8Mn_uNQo(tIlKi zpX6ZQc^`-|an(4vp*vd)^SNh=Ro#iKRpvBh@*kGgjw6S?q%KHqoeH6(_1wIA`lV^z zAiRs`A3r0$<3C?@`aE7#*py0h!ZV&RT$9)V_a4o83@+F_%Eo_IXpu`p#0RmnkYKV6>PRTk%i$*vH0e2KA$-EIE^&JXaojXAE*53ZKr9x)`Qum z7UB9BUT@5(waVq@friz=*QwcTSIWnOG4BIs|6G-zA;m{oOAc}4!>le3X(;(rUNgef z(7*5!tt5aZn8P0!173!kFHC$!crh8;jTxMQSIE;}csC5F6Vx;H$&(nH3E%(&HAh^MAf}e0nfSMQPOniL_ z7j57+Bi!(wmiNfn2t9a|2C1x>?Ls7;Mf~#%uyxQ4XbR0iiZG~93)7HJPQ|COV0;>D z#;*;}%i>vM=bScHgBHF=!NCGns4A2;tr8_sKh_4a@ zt{B5ZWXgYDXOdJtuC%DBe?Lald9&;{9%iclNek+#CCvfe_-`5NJW@!FZA`&&O&=p9 zUwlVLYHm&ldOFGYwv^64tn!6!H32EqrT>2?b9bz=kKq{R5PdaZBW0#`LK1sQ18{uJjq4Q*}wb*uTa%(>{4%;VK01*KSq zh^qcE(^@tu>pk>REghc5E4ZPCWk%EaO%C z&%%0tbPv5YmqdT&R)}mL3i4XV6jvmR@TXK!7qX{ZJj;Gln!(~06Vc5%7Z>XGw*|CW z{3(&T7JDu_+<_&!Qbi0h)Zwm?Xj;_}Cbifn__LJbIWH-7#rR}P@spEbTfxO^XYW%M zhJEnJEAHE}H`p5>4E?|@|MY1)YOBU;fR@a2X-nTo)!{n3Xe8yyJAvAW=7UAr+^*hFU0;)||N9fTIy zB@~>=9fZueR+b%uo2$%=%7YAE@|9h4K3Gnr3xsLX&S#8Hmt95P4}F2SFI?k!cZE44 z^2&Ay?B%9a<(R{>NER!X`!cultn!S|gQPK!EeGM-a%y_zD!WSZ*gKbs4pw(8pY<-^ zZBJZw0{4iaQ9^ zT8kD}ql$!cJZi)g!$|5ll7vYeP!8VLd+Mk=2qkg8GX(MjA-$f&*W^R5TcrikeH_3g z2RzjTDrfB$SYPI)M3L--)_uH^7i!obxP{DPi zM5t48>!<|&hzBc#kyj=3dbup07F$XBsm!&;-|?ih7;FeG61KWhHgd-0#CxaI2<~64 zohOXU9U8pb+TZb2+zY+0l&eo_^T46u{q~Ue|CxIAMORWHakreaG}#%Q%Wu`*Og7GV zU(<`Cn@pWKnelXBd)xB7O*ED&nM^4DsVG+&`L>C}E7;)|eoNuO5us;xlLaK?UPnWL z9oIsOax`n6NWdBgeD0uZkVvFNYZ%?+(*c2XdpL?3?WayfRx`iGtCGnq$3sx;Vx(au zeMO66%Z|@fLcKSiZ}rdp!ka9fSR9_AmJ&!TPG)LeAcVXh*qv(ZH>Fx_p?Z7S7nWz) z)ey*k3!|#s(e?>@K9M-NqOo)0su5>}F+r^NmaMFtnvw_?(x_3SS5a+IXoVT<|7f5n z-$buLmMlGF3C@o%cq8VqPK?AJsprrN^WyKE4no3s8pPF}Mx72q;$0I|xYfakYG_Gc z357U>Rwm+~cQ?0o5ZVLAvyHORs^qFRX=&JXjNyp<-C>)ib3q~29*v;gHnL2YMhrPvbt=vSuYW4(cr@f z8=UnNlqNf&edfv)#HSxS=HRS5$s<37`H)w=WnJZkdw)=f6Q~4HzGpHu=cCi6ALdP1 zOCr9WAv56gk*@9&ED&R5pq8^O508?s7~M)Fejy@&lnCqs11Ju?5*TNoMVw8rVifFj zD0Up1el31t94lNCfFJZE_M$Bg$??f}Y%#sOy>j30VgauF7cy3Jc`~NLc@mm zb8?LBF*sBh>XCT{wRV0tuIBgEOClz^!hqnpS-}56WzSQ*Z%VqH3wb{?>5ydo4tnPU zxyUu-egF3R#hbM+cj|mFzLvWi^Qho&TOYdh=><&`I1208d#|_`Ht* zfRdAjL*2={gxY5jye5M9Fzx%{!{{ykj`IBreyhrM>4S#a(B$UT4niMF_`CmYdt<}! zv8TF&?0Y&h^K-)qPt6Bqvdv`30^U!{lAW*_lN~5#lp;HEsikw`{me=8=mP$JDi?Wt zpa#P;VlYn}B(4JBW&+~lL7B{A@a#9uw?wkCvgxV=oB4M7kt}3Vvit@|LV5W!K?I|L z;3>H|#C-&2vSf0SPNeU_A;)l4Y=bTzbFMEopMuqayJ>Lz%MeuS)id4_(^6#Vsx^#o zqJb}O-d?j;t$TRbuU`6g@^K<|lER|I)?xgC5t-FXN4tI4sFc_8?ck z_s6pNjh^u1IPD}Zwz6z0QHJgOnmH*Tb6H$7o)*DF6c6r@K!6SodT)WI{mhGGYJ}Iv z!G7g_coQcvliHBmNaKOzCs7eL*ZUIhBH6^Vh1?Ut9Hgq~`^Uy{HQT9hx&FUXSiT-x%ApC;r_aezH z5*`hvJZYm4$ztvx)wS-`9#1_?{hdO*b6x)e;_Sl70nEZD-K&s5e7azHJS6&nIr0Jy z?hX=4@T`nG|L}!jp#>f|MKlg4`HoU`vDo%oI}t>JFDa7b*?2-Xjg7j)tL_sR)!fA4 z23JD&1o4a40%LCb>_Aj+KL-dDo6-q&IyRM3Vtl zU6Y4%0zY5B3a3h_CFR^*rw14cAhz554#zc6UOiEcHj1tR-a)J!uynF>Gtjm(L5vac zkXVJ}Py~5D=3bgQMWH~wV;yehqYQ&q*5boqKlP*5;s z`X$CJ`Am|30f|^+vYK=ms{$_?=mVJC$3(L1Ny~P_IR~dzTaL2&%qKA?v&>rSREbn1 zkzOFc&M>~dF3>-o5p){uFYMDUgU?T*?8t2ujbV>sTsYHiSGuKX-cIu3QDPS6oVyA4EfZW2Xu4$^yXXbD|MOyt_HljBV9W z6`249m?4$_7Z3xlgJsFO8%4&}bYl3;ZyYtwQ0-PxX`kA^+oQ_p*x74by-6~1385-` za4&r=N%(~UHR7s(Dk}VPdPzeDZiiDz89;xt4p`a7Tg6>H)D3wmCj|!yibe7T{AVh; z*4=`{Lh%R{UP?R~u#_Hh;B9SUj(aupz6921>-B58q3%Q7{#bHcIb^a=%!{q|0`7%`CQcJU~7Riz({dUF&@K;~-%)}AK|MpP z6Vq)quNDoPAyEd~Zbr-yWc;Z)i+Ff@&0EFP-0rD^+#qCOLB+7J0{)#VaJAHF?AKT} z(v`Yr>SbyflDqkG5@ggM7A>wpIw7u#q*V7aSJ^-QJIP#+3%@TSRBw}~2Sq{JXiSHN zCvYnL$RPDV$sdq;5H!BCyKVExK{i3sTToWE`yQkVVmeuft0<@iSmwbkZ&W0`8Hq}1 z8pY?Q4kVmBAl-6C3703W%N+{L$2-ptYO!Xr_!s~_mYIKk#TD0f#l(r)50*1O zT~}6fshz-2@bN`%=&ax6Q3Rtco!>Xw+yDk&7V_`#v@)#s*R1XPkO;Kw|0ka~6a zdfJPaG8moV6TDf9k{=LetjpsNUZc}^*~h?omwZo}fmCQuOonx^b(n-}IZ3?t4W_#PZ236ID--qTq5GeclbvmU%r!C#T|19f7bM={LI z<$K@Ay!9H!DU!u7g?@d<%}CWobKJz-j;*zV=OZy49x4J6K894zlL`2^25M^|_z#AL zXRIxR;0&gwh`h+Me|Am;a4OM@*YSZ%LB0eoh2dUNAF~gb%BmMX2lz)ubQF>z&k;|v zXuXMHT#4$qC6F(|-5iTQ5?njvOXssIn6VZBhjT-nLXa_9J10)*#OMc(E~FW4_y!tr zpyow~JQ9{b<=G(42t7}_U*5Jis{Ng*(?eYKObubVVF;gk1;H1)`_hAs*i5FhyV1qL zn_mH!s86VWez=1m?V;$Vt0F!bK8UlrJ+X$$yoR+V$RpVdzGVrSVUrMb0r)I=BJkO% z_;ZL~1d55oZ&JGEJ7*n_=(lfD$}1Lk%(0H%06I0>{Em<8P@p2|9wmtwi94%en3joo zs5BV`Jf6IO|8BL{_3tX)rCp({-nhh}lkUihBo@j<`rW%CNRvD3+-zQN=HxCtvKuP| zNIYrR(!Tx^zCmRB+hK=BhiGvJBknGgf?KLqy8EO(XPvTw#;&~3B2aSu>7@gR1*ApI z0LrjP!rn1=%VhYywzo8Vfkez_K2wE(bANl+7!(j-Sw4~|2#VgPke%2TlsM#>2O zLM}42U(mDn^%}D32eRO)0Fs^#4_|RAO#u$wk7Qv?pvUbXdt{J;J3n6>YPP3zAc%2| zPvr-S$1_O%i!FnFDWk38P|nv@7)5NtM)P?EpeFjkip85!G?Z>Kt`3TKiU>k@Ntcr2 z#P?Bns)Ks){v6ddC*TseBo`@*_fg`m*AQz7*N~vkU=p*%bz-r|l&0E^;EHG2hogJ7 zCu*dN>lLXcfPHZSc%61JbC4yDBXEzmnAxoc&$#U`**7>xwezv8^?kb+LEiUk*vCQ< z7L||Hhfe6z;xo~-EvoBw=Vec1^%8ZRv&%|J+Be~9bP{&_y^J(7RzC_{lIY+z4=tj@ z<}I-`VGYH;h+>$^M(_cWr_3@9AZT<{dA$!Xh+&&#MKY6opZk-mKsA(SpLEx<$y^Cn z4gkx||C00p3n8eH*|2aioZK-IBa-L-fWcVn}SELDwx)Jllb2CHe3m@i&x>cGr9Ixs~!M zOG^|wxxkH`PTJTw$Vx6q7Ax79yy+6I=BgXb-)k6Y82cgezic&j=wqQLOON1tK{+=X zpWj+L2-Kss&cf)H4VjJEQG?~4_z1!Cfu8!z!_~*+8S%dTn}^P&d(*_}T)uaQKEDMB z0M~w`LHBpvNQK~#Louu+Jzk=+1pSQ(JmX9iy~{1i%Eh*0F-nab-tJ2*b{NC1GBZkm z<5WTuPy?R>lK%5c)Rw5S8C1f%69VqqvsTC+|9xOtHLX(Gm(+n1R|+kgDIR!cZe^SRw}7d z;1&em1-gDV6g*@e4JNquZCras|!I3mmu2_8wnNe^b(RX!YgJmR@kpN_+ke zN`AvRg&|j zlt6_`N3vKGh+P?G>H$^=Hk26yRz|@`CzS8?a?UqmvhMU)n#Q*q&hVAJM7=7`g@9pe z89^<=G(sm_Xlz7mRswoTyYz60oQcfIC5`WJn*c#XDC%LR1XncX@lk5zthKr8aWR6g z*hz(MArpKerN|aCl=H|}N;ULiw!VkJdB6UT&f3!vDrVG_N30uZJ*3FGavst7@RE(% zQ3-P_&_?8bq2tAqnG~n{@01>-qa3GMUVkVib@76t>i+aY#M?422j6bHc9ILyvS*B> zQQ;hTorEx+5%Ejntqj?MpK@L-A>*grn3}Xmf~eL9A<3fu@V^M${v%Mb`npo{-kWab zY$g4;waJ-CY5_)}&t6?C)$H8ON*&Z{gA*WkD2AnI$WqGr+dDx4Jha4IECI7ORlX%xLkM2S>PMcfQAoTHXiHgre$Ng``C+UO#Tf z%h)nwFM(vfd1`y)$+e<9#vF(0WB#2seWeOrC8+#Sznrt;aTFq+VHge(W zrLULV-9kwxSkZvb=A>{4q$?@Los{c>y!(<4Z}}x7H_1eA)Vm2%hAVvAq&Gr=X3qss z%ZI$*`HOR832P|h_`UCt@YeCB?vDk`1ijIFpj0~S;5t0+y?on^xUzWvD01NIzw-6X zg!GOMi0ue9#H92NEiey6Cu+B^icR#ZYNp@eiUFO?Nfr7Ruph>k>z8L==o+C44y|SzJlM0I*>xbKB8ipr}PC$Vq1>q1lcQUVmYSy6QkL>A*e-!H* zE^(h_rDTROBbAFN7eq_a_1wd0CwYNzI#a@`n-!AuwhhFxQXr+>8N&+;k^;lb@8IM0MP++-^ot&?qrdT% z@mt^g{?3Z;HrZm^T9}sx)ecIrLxK@CD-D*|m9|IDBSIvWPqVHyJ{kM@xVB3677f>}YM!uoen+4Oz@ixxU4lLhmdnA5_Cq zn!eQCP6VBdu#5-q++!n15F&4}luzs{UuR55zOLgFrsna*>NC!J?Cp@C$r2nxuAoQ6_@4>i!6BY@q3nq~DerN>eBtm6*u#Q`uY>m(|fJDWc zpd*|pqn5K+7*%^nTL*KYS_V1t6%vq`ecJ&{84B}oF zCzG?le%RKJAo5Za*j|fNy}S>y9=!0XA^r$uwZD_MT)i18>}k80A($6~-0{+6T>DhH z))3w`G*u{EYE@%Bnl`c);H`-I_l(mxT>~H9CT$R>H^+UeV*&En!Rqu z{b+UcK~w&8PUYTj?1*4Qo4e_xVehcV!aJ`ri#6`$VfW$Z)xp#{#z~hsQAf`=ZCNL{JQMT4Pss0(=nZcMfFg6F79R(b&tT1 zA~R(|O243sb%AyG9^}`bKkgKq*>=nPf)x~SUzz6ij(RZ7+V`Tx0@d|mcE1L^^tM(30<+-Ybq|(J5AS4>HfrK@Y`q@59{K__?e~yDbZ00uR4!EC zK}u!5t72Q@REmf9ef}1&kj+`|1rPau?7e4HlUdsanh*gM31(1`t^q3`BE740X(C8( zf|Sr1y0oC9L=jL>G*p!)T?FYRDvCHXZnnRnjr=lOB|ob|57xYj`O zWbb?5^}4R-38Z$((qB)7V9+n0$aFMEeQ3{fU#qo3_PqRNMFA z+mIO;DeNRuXq#nNM>g2p!EhP5+yZt`TKvYcFAdvl2^aO>6{`>Es$(9i$VwZI9x%cL^DDq&w!wZHVY@rTt4He7fwF#RS8R^g+Jz4D!L3?-Z z9dH74_Tt{(c+R0|+aQbc3s*js^&HpnxXbx%3Uu;HF2g9jB9Zr?t1Xs# zZwf2+@tb*)Uu1NK$i*-?^fw?-(Rt$&q5|}Jz(6%2)0z1N z1_ir6bCIqAAsY&4EmWTP*WW&knLS}7`D_fB1VRY#0DKv@wq)rQsCOsEVC^mQhC}+| z6M3xb<*m(wln%dw9mHkKf-)J<%0vi|%Ta$8U(eC`DBXvsF~Thxk?%OAXWs~npX zMXKF6Z>Mjq4)HWdS6$vRfb`CmE3F>GV0%XJ?=-XflZyT5ig7W3q(!6$fB^rpHcu*G zR=y*^nZ)<-g2&>|2ZW2!zo&(xD1ldzZj)yL^ahvQ(U8KFeS;A#QU~0bE2}CxP=~UEZeGb>GZtt_<_0+adtoS3|76ysOFl z<4KjITfRl%MyPA^`qKKJZ^UO;zA=RdQd45uhL@fQr_B$nxhvbfT7T;swYuQZy4wA% z*sFSdV!F#G42NXu)jeON~+46&oGO4OKty4qH9<`gONT2fq?Brwk@Dy1%4Xn6$o7H|-@B zUbhm}(EljB_)8F}VzzFHlq*^&f3Ao1KM8^x8ZcO5#QBBB1!9NNMLt+EOv^B8?fJnz znDH)&fu5+No~jCU1A#5@vGa-)O)k&jbY>5o$2^lDy|(~)GA8z}F%PWtq32NOaJbCY zuQZ3{Z%PP}M-t9cm^uDGd(X~I6oxFXukdheyj#~}UVm4cdt+{*MQ6MJ9It)qk`Avutm|Sj`T=M*4Orf*{AnkC3@K$(Fb&PCK0B zfB&9j*FP=rv4gmLENDw!_%tV04c7$DZEpXw?&{#D-&F`jq5tEhm)nO5{)M46M0USA zeo+*xg`zc)3Ohty?{|~f`4;b$2?rYv?Nmfp9;#NSiv*CQN$3_n@2q@j&{+NxpKdrk|T4B!q7kGqAUQ^Um+3c?TiYvGc(I|Pxd)q`geV-P%*}9k8NRsc67hn zox1?zjSwx0bo)C;_eXzY%dnGxdsnMpL_u&ZxO8hG()YC9yE)=#w?!9~+~fQPEaSG4 z0U2%~5Jm+XD#uM#-kQz`$#emOq1VM>Y~tvCt%8h5;r!+S8&86b*Zg`K zs!Wr#)ANgy-YSJUXGivI><=y-AB3ls-0SUZz@wYV{<>IOf>f&@B~` zOraol$Mb||9-v!R+IO4jcYOv`Xthai9oZegF7{_!xiFYn^PBo4sSOPtaF}+CqC^^p zk8IMQ6p{qyx7H8iOuniU0hZer#M@0Plsv&*F?dw$V=g_{lpO*VKlc(yy^?<`TBNMD z7(P{z1o1i3odzvZ{_HPmeG=Yv(z zG&Rb*@BA|lc$YaTOYjRHoBJ|q%u;&LmyfHj=u!-TaE8?sG4TWBrZe}{iXD6>Y*kac z{()=$m+n_`w@Yrhwr|QAqwMtjIa;bUXosRg56`>?&C^Alu%ocW(j!Wb=peZytkp)( ze`I8m^sPjSyWN7qOSv3uSl@d2W*sQPq(}ltu^sCh?&X%b%Ee0Jpy8_&x&fKeB=79B z3i2dNYsZh|r}Fj--$=8tvJ~jPTHpp}p70lv+L846@V8JQw;r0SqmllAXf(Trg$n(h zBszBjaB~7C5!fO(Erg_kOn7D9U&Ry2Y2+~<3@MH0>2(e5t+A0Yk^o}jA|LaUad^Zk zQ_z*IISaoyp$*$$Lnz}ib5BF%y$^UF;$M(*H9_d)GQCI#H-$n|6c?L>6hkh?#2z>U z4omN7UE4%eI=!tG#HE#Y$3jCfslr(cpbj8Dq>IQAKs5L`FH2F>BYg~Zf#^-Tra)% z=@J1CS#{<4sYT?@VF}j_2te-qQEq_bTZ#uSd5)nFU&;trMO<&eYxX zI`W>isK2l`l4clJmLPe8p2Y$kaliN}lHgCzKA-S#=JP&I^CC%s*<@z}{RN+2NRpmuyoO1HQC}1MS+=`35$?{)pKtvs^bdewD2FpzEg4Y%h9QDSWnflq)2Py9o1}&LGBVFH5 z+=g7nwB*7zFZMAUxeTZ^m-inuzdV-GHO=xEf|PMXLppO&YxJK+-f;mxEczEVM#Mmt ziQZ_kG-WV#0?D2%N6-=P6W&2~{l{vEQ^%fdW%g6XvenJ#9LCaG(AiFeXGZ^vX_iNs zeM{fKsZKdJCyIhJ7{c0BM28)J*!a4N0TQ?qCbdAv{p}gtzs!dOI68Jt**;QAa$TM< zR*ZMt+iS?WY{m42azsG;>T{Tqn9|NlA?A0~UZ^@MZ4}nhSN+Pl%92CinaZaK=DQjd zx?N6`5lC-Om9)D6q-cm9F4lfSdw#3rC$z*D2Y%;F+(Do#9#`-6Ki$2U@CMhUp37>? zdCvqfJhc@R^M+*x6j_nRmJwShx0zk{msmet;*!0yJjZ z^OevHa5dv7W+T>35hQwgSPc1sR907#5i2qlgn z-L3Ltr`*h_qnl%h23D(Ce4J2A3S0yk$DF3MU^cyza-m<`owd6}Bp*MpRw1x3>xJ?R zs2QDKsMue$1HD_HtI=WZX|1ySxr?RS(C*f4>RpX5A7Gv_1tj2ivgs6=WIN0Ie&dZw z+?Vkt=Eeue?((#nXD1I1GOZhIZA#mL)P-lPQxYVOa`?5V2!5THCgI{Oby_tXmG{=7 zLW)p5)!)v?-^^EW4?EUPYZF{s(!9WGC-c)ySAXGv``VMVfyi$30vV{-ch6#Lrl&3h8zDifr;DN{T$`-pJjzA=35p)qzM@u6C5B zvn{lWkL439j;Dd(HH*I7+^*?x23aKlW(|vz4)AQ*`Tdpo@p<~0`{3Y0kYaLAZfX^F zs&IIEY?a$;K?881qcV;7$xD?2X#nD3J<`I(;YFot{7!?hQKY*M!(jQKj$P!`?kKKGog$ zL0OY0YEue62=zO%wqNcd&;Ce&PaGc&~R94_S+sgt3Qd9#{5 z2p3_Qn3?oDgy>t##6Uk4N%GwmYc9KhZ`@lLf*SV3Miz)LQ}}=D-wopSi#vXDCoO#0 z0-6nILZO&~{78^nWZn#(Zv4DV?+XHowb@qxb0GS3{x--F4kR1TLVtbHtr8!61c7>& z`>QaC2MAcbxNQi8er=(co193H1MZ=8)*dqjc8fb$$qtmRpAe?VHt^eFUOp`D>c4x; z9f4-~w;UVfD>3z(iRrM3%%rs=w0X#*bXk3N+jdVtw_SYF><$Wn-m`>bm1_hd{N%NV5c{~@72EkUshoP z39lXH6Da(?>Fz1qwn7JYDQ`GOZ@KP>C5@#z70V^gFruJ z*DM)^3i=1o!YQ|4WeKpD#V;wMmgQr=vez7ZP0akZ|{^m50FmAt1NZYmOVMoek0eqao55eP|@B*30CAB~;WDi5c z=<_&9`TsI0NcxB@bZlc=;=U_L$y?R$FG;mtL|obd9$O3?!1!+pfqvQ{zTOWOE`1|r^Y?p%q;wOHM0FKhPQOIr1V_SI(vR;V z-|XQSAT^?_ND6pOgtJ82VwXGkKg<^uB6mJuZc_tJv!+djHIF< z*~rd^62?5lT#K#FI&_j@NK#z7-nHHK^Q1qLpRJZxmQTz1w7c5io?5=;WpJ4B?+3I? z{>x1c0a__WsRVyl1cit&E~#@YJ~Pq+)(V?7YtnWIm)wuDa81JQsGNxD@bXK#Y= zLY_=Mz>|JE_MS1@e}w{+OkUPd7<&;iDCogYSF6S^Y5^-G4V~__?}fa{X77Rx=AzjL3{* z(rQuQ%-8X~Mjn=~qwmsNNZ*W;L+J++$exy|^j}3x3Pfhj_&Bw|J(MnR7%^}dCnI1H zZcIHYKHMQ%`okixMS0|n@&KfsXuL08B9ifyT#YX~e8aaCC8>JD=Dh34f2&qriHKfe zd&-P5g1%R0`d-UnuWUhwX-;+xl$5^byg}zvnT=t+=P3Rpw@49L=X@#B17no3Ddgof zDZ|c!-%9Vl&v)?x&`_K~DCpUgU&%>0%5^)XgT_#PHG(MunZo)*;cKAEE-O_jitl0X z0q~o<>_RW^{d<<6)p!W>_wE!ljKI3ytwYNUv~@Uhto)S=Ku03Iu{2$ZF$K>R&gm98 zy;dVjDDaW-#$p2O(XmS`gGO}%$YPn@4%bgxdBXz)+hXwdfTdtzctf8VK zudgmOFzX^!kUjHWZn@=)?stwq$ejF5ko~<7C&QdxM%hSLn)9AVoZRzghIdrMLMkA4 z{h#Uw%|krE(`;a~52WM915Q|XL@QERa<^uX_QipJb2?lVxtbh-M(Q)9os-&mzx7v* zHKP^63VMP1@WkPVLb$~L^I|C=WhPKM_zb+}(rg^V;$X8(9e*Wv<$>jEJYhn)EB_`9 zC}2rbUPudFsbyaw+9|uO8PDU-V_ly+4@y_eGMFM|>WE|=E`Cm_n?0s&^&jil)&!8e?dt08aq4{ z;RQy){oKyv`4K;#YPil+iVAh&rEQXD_3-7w>ENEZ4dX%!)?4$%!RBsC)8XMu{yN)U zfGpX*Z|T zPr}cjftA;s&;EQ-0h+GYg&*8yxQY5-6GPJ4d-V6QWfK3uOrV@iCmxlP*A2S4eqT$H zPyc_1^gmbKc833-2CRuO^Y6g1&?m0&Qslbje!G6Aw$K0ak#_w5`>g!__7q|}5ADO0 zFqrL9TT?G5bq&v5&^4QOjGVoUJoy{%ni@$P6*;?SRlkhlnd9JqUi|z2PFKI-%w<@tv zPd@qGNZqJzvLPi=S7gRFu5J$7zA3+G@cge2S(uVtl`Nk>D-A2p{wyAx^B`TdtLwc2 zAGA>IUx{+*jav(d8~%~j+u8HI$FD8l{Of|fn$Yvea^*nhrD;q2R=3hH!2+_#C}h zJ(Lqd-dL4!VXUuWe60k}xflDz7%29uMK9rz$|ZaD!bX{;P|g94J+nJD1*qW(D*==q z@)yYuDmCHOl4i~2>08Q!w!_Xtw#DOVMylf~b^~o!sT0aFXRmBJ2o7-Eg17NiqADLO z4uxAf4at2_DKf&)BTJUk_uzKm^U71p?a zPn+PcSlQ5v7izh-H7~IBQ=pqG^E9SJ!26WC!9s>v>yijD^n`1HVr`V$gf zVtR-7-zL=cl}~%|4S1d|cCgGXZe#q+T4q1oQpQ7WcwG0+>peM{#h3MC?O-}{KwQ<0 zQ513L+)rx1u8j6yCdZZ!dkfAeZD??2elo?BFwgi0?3`JhS^@|0zg&jX?q8ehCM`t@ zAWUnzLMpMNp_Cs}yhgv4h$|^md*i7^YwoEH*mlOaf3yG{E@D*W5l_#tS0>b82|vwb z^tk6=LzC|q77W!27~laU4E(yntGpMuT_>ecrpVz8YeAcP#EMTE@rh0~v*EteQ`0VP z@L#%lSj7X+QtKu7D!u(4d`?Q-mCvtulw2$b2-E|$unEr-q{IBw?&*zg7^L&)#vZeWjtXq=4*wHf-7mKQi zWBqO(dsu3lJ%netf1K`Td#Wo>zakzqQj3tde8 z*)>&@Mfz085sq+x?O1!Ue*MQ$og6K(Pg7U&Z}e#mn9m=kG!ug@efVRnWVn|Pmg~6? zh5t1BVP08S#?bCq*1P=J>!fDm*)sJJO@^k90Ojd4Pvprb8)Pw`re_>^$@l#kXpgAD z?(j`?RqV^BX>$EikBYnbp)yA570 zr>kdu3>edNg==t4;~!Ph5z0hjAjd?<@2Ia!NW6CFW<+2=ZMUlm|b zPA8hDIL|}Ew;;elWJES~-d1MrIdY=(mUQs1=`vh~%CXiA>|n=04$>gw!Ok*3=?oZV zTgk$CZ(IWWjLP*HHt#O-c{@6$aRmr_%niORYvJQqhKitOwrp6euGa2JQ%)@R4sh{! zYFN!wiC88iM3nJXrxs~;#NLogGNJjLHjRlP#DdRy)ofima*n} zvK1ao;sj&Wf__Ca#Rr_ur_!#6(-PMdZhak6;%M$fKw{mBVm#HB>7<&PtLP6$X{Xd>M*`^9z54JQ+%{9~cl zkG^2RLFY+!oRLm!Q|=p<5~8%nyu%*5{$~NUOVzUM%Mv2&x}7s5%gBaia!mK-PhJ=w z1o*p=H@io=J7c}(0Ab++xi&)<1~zV47L}~Zz#J+BX51m+yXn^?QW2gC@w^2 zY(5HHToQD78b5A_8_NA2!>FNi!hW9chHu>u z#3qicr2Lgt{iI(jBqBV-X~;Omai7Tg9$PiVlz=*MjtQr6(e6V|m(&!wgP00srvp>6 z2Go+JSL?~^8a^DNU?ijsLXpkPh}T(Hnql^7m{-MehV69rxAfhLJV8M|yfghAlsPc@ z$y_v;{N{((aUazn>fZKtan1HEmcq9nixO8JOdU+BFQUwOQw+-n3pFY4spcg&On!vu zISp0vZjn6W+i+z|;eihqx~@`K%M5>7-Y#{yg|)Y($dSxttvwRmpXg1<%SaR#qa~WR z7r%a-MupNcPa)Ul$v28j#x{h?d$@i$`x58Wf0cqFg-FcxjEeNLVoEq^SV~)D_gkNL z?kJi?f1^M{LyTEP>!y3ZbC1nST$z?k$B~-N>rJ-j<1=qOr)A_$1#G5h^Mr+6@!=IM zp2j)cQ2Aj-?d#UNCCj}W2EpbcvP`ZPg>QE5bsG0z*$~idR4yBIG|1XmdV0C?R=M{& z=IG`g12tZQ8u1m`(6}^5OI!lR<{`1yIEtvw5q`7nbsUEs}=Bp3=z|wfA#IfFf+t4@w4zoMPGIB=cI8{JZeth)f)NBp1vxL zWPk2`>PjzSBIr#PJ@W#5$g6n7%e|aI0BQ5hP`SDJbsH__Ic@GPx%P#8<6VFCbkmL_ zLTVmK9W_ir>@**i?;CqD&YjtjRLfL#g@yf;K4P%1|6V;4wG@V02H8CM=p#-Nv?UVH z>AA(JQ-F#!KnAz3);^fi5wO8)!mHU8$n4=mwbozfN~G4#j^R}v&+Ps6Ua2LJl~%wv zs5=$Qc2#<2`549nfrDhFw<3yY8~0-pC&5>yyy*h#c0YAR{4j zDGF{(+9=iZN3^67%=Tl%c7!up;T_XU`jg9#Ej1HlYlQK+Iaqr=wG=K)I>RR4@(G%v z$lCWbG^(Bl6Va#l>kwA|4gv03*|UB)G0dqWm(*5mT{jg;FvDQlGU}*9*Bk|2ouE7@ zb1Ux3d!yC4*DpOsj42$sempU3)rHUFHcdsdk)-OqW6_Jmk-ap>k5B+?-Sb^ z!vv~{o@WyreMov+CRi)7*itS1#8hV+F$pP!pSg^FNL#u7H*qb}O2H)cepcSz?IoQ%z|9*8RMna?K# z*~k!0*#E#B?JSZ>kD6{$_1{NxU}1Q}fr$#34%phkYo4l^wa9q#4#CS}9BxOQTe+dv z{FLYDDoa1XHt>;~CK_>)KoC&>%l1OFTH}!QhlylTL%U6BlY^mZwC|3YmxVrZT+7;O zyheOi+_9s0!ZNovM&GWhC{-(&yzqMJV0`vFskFg%J(;?@(%tiqrxY!ozbxyV6B9Wt z(m*id!puJLm@+d>)zm3ZGT9qs8@TT|6BfI8hfu+wl3h5U?HkJ&-mkX~LG-7%mQL4T zQ1v5e{ZGmD{d}5;;W)w^_OSKDC;j2V7f6X`Gd34HU|62lx$}rjkUo<7@l!vsSr0mY$dG4MDVJ&B&`1$u^U3 z6EbR2gSVKhv_cB2FC4^GLDAaP8n?6f_|Z(u`whIZoMz_98mItX56;@$ zCCuR?kVCt8EqJv%D^vD_e&}rb^*xq$Rl>tLCreMX!kOV=NaI7v1go(={lLF^Uv@wkzog!noOd09YSLF6 z{GC^iM>N#T|L&n%1P8%UmNM%zZH$W|u|l}oSmt`WNoO3^en0InKk z5E6}|XE6cPX03emCrykGH#EpUo&O-&`e`=0?Z(<)QVV4YVwth9O8Tn{&vuE9mv&u- zXIu#sB9wbX)h4NFH`E2RH$_sTZ#bHYjV2A9!lpF7y~X_e0%NHV?!|ii(cHF0dQNhndjf|DKDv5Rs=sOp8Qz*Jrk`aU zvZ7?zX{Xk>8#vjY-R7CfQ%BZnv*e?gMhY=LSN#d*?3klnv0g(JaNWtmqC%Z`$pNeP z4?YD7N4BXqMnYU_vjiZ#ZTt!Kw+Rghs|%*XDd^D0^1$`S7CjEac-#APwuU##Q`HLl#CLB~`Q6IU24W$s_hX!q03FQDXNY*myZQNY9_ZAAH!` z#eXMGCWQxMP=G$9JrGSW!u!Ii!6TUw=^1?YPKx&2lf_&y#Ewe#XRFWFJ@Xpk$Ap;D zeXF@ST3=gru9aYB3cgTg*EYAoQ{AyFmfw14CR+*5f*pMiRY5XGfn)BRRaZO}02_qu zQ@)s!qTR&7661b7jE|zni@0>dsSGKzM~OnX@t~38I2V(V4x4yMHQ`7ZV|YT8OuKS( z<`Xk@h0a&-gv_m$b#8+|weRZ7<|hVK)qWsDZwClfmsz#?j=iJaBD}%3A^2}(D=Xv+ z8|cJma*5AHpN)6wyhZnio!Xt(QaLagN*^J`GOw?9?w!#Y$Q9AkA+)|Ji*= z($@sV8F1tDjGEdHW-HybrNaw-{G@3(v-r$!V>hZZpImjTs4o_tKe#}qsElUZn;cl_y%G8Lz9Vi@>N;duc^IZ`!T{UT@#Cit=O6Dgw$Vx z+MPcjt>T`+19a%2RA!eGkl?9t7`wP5%{*1r;;D>U<2^X47y}u(R~e~UeBW&ZI4SbQ zN2aAPJ{SB6X;&4z{ov>#Z7$K-5>d8>o#(kZ@{+7%>=d z_nCH);-9tq&5ZcYO(jzwWkuubz>&upX=$)L@N86DVe28$*T*p><=|S{7S-l9d*UV6 z78?h5!g)By(#+oFTw=bP2sa$(L$ctGgC5u<<$<4p*xL`-q%_FPlii0_pC$)g*>zqO zvR2o2AWzaAueucbIyEg`QY1gJD^FEdXGzZA-5-tPs*}W`bHQwj{#`mlHmRyp83fsV zK_33-nAT)L`+4@P>b4xVk$tAD1EqN=ID)KFFxmIT9j!&+tQOBFjF}%j9mCa3WSV10 zYj8;1H%g(-K!CA>wH;A$eV@u^SZj;q)!1qeGtXDq~kvv0gV*iAdi3R#F!zkJ?U8OqRQqQ-X>i4sZMli z5-_DB>pBuf;AI+awv_v@;Y>z_m-3|hl@9u)Q~sEe7ZCVF|06RsX_>)T`v}UVZaGO; zxB>Z6)Uo+PBsaU3-Myad~w8Q;l-^fV9463{Pdd4@szWw2|-`QjHMqEIOKi0z-d38>FgH zI^$?_UE9JF^HY{Wfm|`H)~HST06yB9oR88qQY&B@x{#`xglFG2@msoyO+Y|WylrB3 zc**DLDMaUAGrMZ5;o5fPJwY}X_|8C8-P!LakSi8@i!cTs)!~MSrY5P!ZBua^Uh4h` zZ3_(ph9MKWb03Uj+$@rB+%aRycP8mtDR$H!z!GR|-f&{3@ecXEWV{}U$xh-fDUn*j zK%kcFHoM-nGitPoRAA6#<(Z66j$9ZV)2D1rZ*CqbuS0LfQG;#!7uKq&z2Y+@)VwQ& zr~A+mQ7oDPN@JAdAdBieaHvr|M)9@}$eJe8jAYi245X7`AedDnH{_rsR<73{Ft zQClES$Rv8pmr517T$E<$4TwwcIL|7BszC5?{OQrH7i+F6EKgPne;82E|ElcQb(Ms3 zn3AKkyeVFkwUW0dfnNw_6zig^v)I!{yJwjJGEEIU&rg=z9VNSWGiKAb( zx2owQHCU>?K9AQH(q5SzTOsv1~I44Tt zBi$s|`aaoeCzz@7RMZ+Q)uJl}Jbq3;rWS{>o@{uDDT#0@Dn>sY>f8C{vl1wZV-~p^ zt_Qr4#mU4a)I3}^>Ga%J%t-B0Ds~*04kEK$RegSIaZ`E8cdM7Wf-c`6%XFP*5S~!F z7NGmef+ABh5f*tpvMzh|=z%EnG+M@uJ_dL!7uy%znv*pzL);&Qz_~=PL}@mjpi>XS z)HJr7x?(q64?_-O%ZyL0(=PP_-(zNJ#Ke_xyV$%|axz|q!Rrn{dK2xQcWcH=)CPTA zfieGniw8g9JZ_qIo&1nk!4UBW#*2AKPhr#ZPU{`QL(xU~x+a_J)5;0uk&9toU13r@B00EZpko8UFOVd0xxk&bV z#AN(*bb_sr-4tjbJp$F+|Uqx*wR1`6XwHh)5w;7&Ds}kk^@LF@(=- zLCW47LwBujPmKBr;3FRIA9*4FzV*uZCCtwY6g?74{@lLL?0_uROdV{_3K8(FZu_N0 z;qDJ?ux;=0=tWIGy!b~np}VXlh`GJTg?}fnhGkCI>w-eMaDk)gb(~XQYFTnU98vK{ z@=!|tAfNv!8HP1$1f~QX#7hh6XT9QeTC`OVA7>oZZd^zwADFD9Z~fU8>ol%tGyiXUgtT(~&1r-!XWbp08r9+!MsSewDnmi)7?-7%BSYa#=)i zl1Bw9J*&>Cp?kmg)k6|82-_e0zFpkux*^sAQI4&H{#I(dkBAB)bvjAUo-`QnHC6WJ zDjNu|%{vX+AFuB0)WMFzEtd>ph{b$8DUm^qk_pp^V*#ABA5U~^r+QXx_F-(9mCq(~ zC?1(OrFui4ALHn-X=QP{rOdzDtLtNdss8AksDw|(aZ&aizR9-dp@ zHtU?*hq{qas7Hfd<(a?WTITQ;pUIwm`8JqCM8YfkLBvq^%!Q!S9uMa~2g;rNgy}{~ z?w4slveW85(bP1G*fn@+Q-s#%$|AQ)~@l%xesl7>r!m{+4fk>Sra<81_{JXe^S!yeifB19Lu(jq@!XfQLTQnfBdV+%Ko4`<5c0 zS!U<)jni|N!fe)?$t}c{59Cl0j{=$|&T+qJD-%VEv_&LFe0unzZSk~km{UEbQj8|q zuP-svkIF;2U{50XAJMaV@t&+7rFG<5QHf!ub+kv$tX-T15vGUNvq|wBQ;(()>Yh#- z&B}9*7Vxc_gv>YGH-kNEr ze!rZxE8_xJ#-oIrl@;QNz5Z^52ECd%ic?0&pXaKCv{B!%1vkH6r7V>3Y%aBsIj@W# z4pVLl$vr*yksQhaw6u!Om%?AW(YOjS!FS3mxTtXr}m1ug2+2R?w_J~CK`4GN{ zEcrIl7j5CyFESEaK(ZF2%M?!gx;cdlNeG-#_~U!el&(^sw?9HOUjf$?ujs^a`?K$h z6@~Ks5z-u*Q~6=|F?xfWQsVg~C_v_tr)0GrhHXb0Vo!OyxHiUTpLdXdEViQaqp}Pn zh`gfb^G$c$#@ccR{ZgJNH#gtz)nkEl zdMipAMzFmn4^MNiT(22vFeU~YIVm#&ow@EX^W{l5vP`51v3W{jc37{r-T^Y~8HHFG) z1U12miNv<|WY+G>iK&=7@&;@(d>Dt?4jDJkth#_+Q)VKR*25AJ>C!Vf zrVi5~lv|jRr_f!$k}Z%wb5F*U`TKS9dZ?4~Bk7;c^O|AmjjoikDPG2M1_R<-*6_iO z=MG6izm9ez6J5Tq9G!eanspr4jJj$yw|IJvhjcx%LY&HKLD%DP9|O84BR_JYJ(e1* z5!g{UtXKyIpDId7h!cED9;ecr@vKzVEo%CyK(2qTWUrT{ua&=1aMyp*4 zd$CPAF})ZqezxriUx)(JQH{o=sl7VhSN-w))m%ZkPMaSt#8NE?p4X;XT#U{SS_wmP zc>6kT6&o4H!1jU6>^MHmw(>m@H&hdN!zM7ka?s95D%cqq~v^98%h^79d!_a_bkh zUNJl4G=x-4D|ZO{p`V97h}-+lY&CUZ^y7<<^y;8t@pHlpZ(c_YI`5A^Uco_R6?I?IB4y zV%0rqQF~SRY?@B>&8b{jF8em|2aDBwg>8ATK8M;q8R1%7RVulb+qM@G0TblK2KC0F zGsY9gDAn(=FrpDxPE!K$OAHbc!B{Ms37y|6?g))|0ost_-;K<^;?i_gsb2n!L9L3! zR<*p^OV2;2Rg5QKQt02$qLNf5N>XrP53}*=aF#zk$I;KVA4>AdIO&M zltOQN4gd`=gllHcWF!zDo{n|c&@VtgKf^!4_z(e^vqNyUh~CGQ39DM~={yz69CX1+ zSw$jMS_0TYLrPb--0he=k;i@UMB5$p7gIAnAFzERX7{D0PITRcqmH36OJ41Xl1r1I zR%h#-g}I1=2&&@)Vz2Y~SY#B^)Of=DsY(I*l%v-(^o1X_QwXEyUNTjAOVugNCeUv2 z(K5*?#1A-~2SgbLnM-tYRLgLLOYRCpKvLDbGXAprvIbR-ls&cg#yc~Pc6YfbE^fny zof$Q|;)ycI2+P!r_V3~2NnymQT}Q=~U&!M#Z7j|C9>3r0MmlLbfwYGWm1dmkC|->V zq6gPg9tqzc$)A+p%l7=xe};aZ3|@7?orP9LuFaR%Vq>k-Jpm+t$_)Y0iZ;|z8=1|7 zxudv^d$VSW_r|+w?*gul86oaD?Dw>8PNQbCFWy`t8}7S&7iQDZhSwlck}8O z6JBDL6Ud>Vrl$=fYIhx&rrvpmJ}7z}8u??AACpyx!OXOKQ@rZHekLlV|J3(Tz+yF4 z(oX92UFZ#ZS8eE^+htYe<{;lXee&H}m~#jxMAfx1ti8HFA%qB94hry^?#hm*x2k#M85cQ_18Sfu83wRZxFElQB zv=mU^*T+Gp{DMI?8|v2(x7)}>JK=MK{9jicjy3U%Mn(LHRoov+>{ID+GoV0Dsqx-1 zret%KdaSs_Q};7p=_WUjWGHZMorQ49lR?%!3%yZ!sC}U3bb5OO%z9^GoQ)XF=dI@- z%K)ALH&XQI=I(L$N*P2(pU^;|HUGx@B+`!?VQ9aq7Uy{|I(OsTt523_cz2~ z4}ePnR7`_o)OJ$NU;8tUznPpLIi2Ub@4(#a5~pV8eAGU!kQIkJDuKflQeCI#uD)C8 zX9~Iis88Jks1Y!B+iZQW1aI{-athCvS8EH+l_%{}ER!dlRLp~d-9mDsgQ`L}9N>dv zK4<;$3=V^5i@6er6$sNy6UR<1zHW1dT^(3!vl{dE^v7fAppJ&!GdD$tA+<6#TIbZ> z6HD*m10;J!=jy5o;Og^Xonswzzsk4tg52JTXZ07?>U?c|k?%}rd}x;ZnMgz19KWtU zPkLs_*d)5~o}N4LF8IoDHQ{qU9EA-uT8}^H3yxCY+5k}EkFTEd$JbBp=9Dp+c=Vh; zz55gRZV^y;ahnX!9Yfo!k+#l+-hP~75fe2JE$*nl+##|O?vHp-Nspvk%V%KW`YWJD zaY&<~X0*D*=m70m$uNx1!RO`jP4~a-3QFmXwYK21+r28UJ^}kt9|Gv-w`u6msUYB0 zk-`84T9g4qb75DsO{e?nvFDI7kN_eDT=~HsjzI4?8SqE{v6x(U+5=CI^B@ibv(Vb7 z5DGu1Ei27^;b+PB!!3`=wW&V*6{~na#qRJ- zakqUwx=xn+W4XR5B&PMzM<&~Rbi0GYqaRm;{PC#=j3zx>vxsujP4B@;V%|Xj2#L0S zt|yx>cNA1Tk;iAZrdLVUoGd_JASHi>H99~OBe!?iN3B*CYgNh;i=a z^V2$7rSv!!ZLjEfT{sW52flVqZ}>otLAbPpwHSnB(jkuUOw0%JKx9GdAza61l|25u zY@c?LwQVj~;kZiY*FC{8P1C8V3P~pGD>yYp39N|;bLWf6X^dt9ZDtCAqK(0!wO4G7 zH<>N3=t3Mp{1qP(W74)rsey*!o`Q*_8?^xa_>avmS$zxLaIWuy9Mn6Dh?*8BRpW(h7=uXtJe5!@KbsTFN>7hJ7ekRv{E;jQ)@iEt0sR= zTc&BRJ#7kx6}F`9hmPz71^`>eAKD?J9!De25IN<#H6z3kGtR*FEKOO&$%P0 zop(NxMKL~7-n#yY$NP)vg=Q5FD^fl?B!v@W%XBhxuzlg!?BxSGeRePinD9NFJ?(3x z_T{6ytuUV%pPhn3Fl+0I)o;tFK*a~5LdF|0)BH1>D zI>hv)UkS$7=Gulp2d5(2Q31#JS|1VCUht_CBd~1`Q}LqLZFFpV56`cC&N&exE$1n> z2FHp(5{IFEBm1#FZg}^nNOJM)V4zZ|^|Cn^X8QEp_|!p6$<1-Um`P>p`ytavKv%W z7z`#1#`2!y{yl%apXc*=|9+om{?dKVbzbLroX7D!j^q11=enqwC!7|FZpigz>X->P zbM`&hdCmG01>iUXz)sgsX^c#klmBTu!(`6n$A9I$N(nU&3pvIPxD*duDv0za2hgN3D7nI_>%iy)1;5!o^ z17`Cij-C-$OLI5uc^esFoMmt{w{uVN+v`FH*aNzyS3-8|*rfH~tsXVIB1fg1chi$) zRZ>zX3UeSLzBx`*R%YiBCH$pMDzGPSAX7&x6G|!@-9&N)0~*Fr?aTsMk>-H1S;-SjHbqTeV(cKSXDXq;=^< zN6!>z%KEQ+rZf5?rl#w&SZm5smMD=-2kMQs@RwcghBk8-Goyj@Xtiw9kQ}VxCr*m1 zf;0nkxT<3Msfndel)*8YR&=(T9e4vWh$qe~fY7_jmTni(>N=w5M0_@Q>U|fNUwJ#m zwMCsg*qTsi>)>bdp4tG~C*f`>I{|X%*aaZxolcQll${g-)Of?eL1%D^(qZ5C^nKs6 zqpi(TZtk~x2%fWHaJ0n9ViuH{nF*Ea`Ut2$Pud>Qm6eUo&;Z2 zA=)$^0a{NYpq=<%P;O!P0EzK0_KXom-Swa8-i9B*Eh4_#8E8@qZyy`5*R!d&vqd@H z^ZGXinBeSFW^{Ch;`f`3(%Y2Ds)|<|d)Vo5A}cad6Az*z?`zCF+6J2I)~ydbYnsCW z>)H6mW-e{%E(hv)wS!T>u#Ipq{BU0Jdw`Vhb>H0QyUn{#_Sh?I^maP<%t6TXqwy4t zP4`c63ZlDk`}EJqiiG_ckhc4+%$)z=kkr)MXf36Ep`PGp7hCEUguz6WRe_~{-GArn zbX{WIviM|M^xWN_=~wPYKhc`H{KN9{-0p8TCwH!YfGLhQJ)IpL zeL^HbRNQJv>PujD!{Pi2pF8(&uKtqEz}E=WEtjtVo$X>#VeM}zN2h@Ghm-Xpenf1F z2)`fwTk(5-&)J`~mYNSsNa^1P!cDf3OVo2Gb^{YR?CA^<~Y;ON$do+^k6d zn#GM;&$RA?dvo0lz{q|JFJGp9ZZKttXZc^>XDR9?iu)(JUh#Wz^sXN=twR5cwMgi% zotEi)nCKM^95?Bz)8eHZ7ip=d7+@b@9Q-<>}dfeOYA_?lK&GOL>w|OW4;UJj4$oLydY- z;6!8YbC**8EjHN7kxRNKdYk{{9`1augoYA$Kj#>N8GA5t{Q7EC*Mg0CapuF#+k6qH zp1Z;ajhNOvRbIUHGU3?nqLSL*Kp0FymTuqQ_dQw4_iVt^4T5F>Mc2&c66IHa->Ko% zAsu;4NovY+@CZ04K_9#Wc-mvzK|%#ms`bJnA}DKg7;eBi9B`sdPU=Wpw9S=LIiW*8 z;%vaJyqe>uQ`JnRmR7py{9)q>n*Q{}FXyn1W0*sSOr7qzo0vb?dpdfTsvkvwI|E8W1}5m~OXc=A$KO(zmlH|KMQ*reeo#2h0g5=&^HtWZE}16+__ zv7M}iCd|;#I9;r`92!)j5oOjrqT`B2;!uYJ$svu6%J4thu%vD(2x=YPW9xx;5q+mu zC$U(TFumKZM^6l5eBZz5^9A8x;Kkk+Z3uD{T?)?-9Xongp({9$6!-*mQaCkK9R<*GrDw9)NI9SedHW7mab1qNl#Oy&l_*f}n`;SCj8kgWOHgACPzi9@mMZY&EG@J3lGW;1= zeatsO4Ig27T{QR1ZTd6%5j<}i9b)8m^_>xVehEqX!5@;-cyHNzd~(RX)h*vp99TER z#`&ivp?w3M>`s(`AnWyfS4jLh9hbrmDyWWRtKpa54{s$^!=J@`LJYwP!|B~}HrZ<+ zM{#S5U$fJLRdrw=5s>kNk+59`vA{-f9rdpRv4j$0n6yigfQ zj0X0AWWDe^OXXf7!4~iYaI}($8cFErPUGIN(W@AbiZeM=M7}Mh{032?qoyhonNUD| z-#RC;_RQl{o+R)iFzLfL)S`8)2w+)vSAFE_GI(WH;L{B_#??`7I-d8UYwQ%S3BleZ zMn}G!6m`14-J)=T)JRbXt7r59|_aX)^!=OmS)lCjSJm~ z(tr0nVUx0MNYu3*T4Z!>z=6w;<2OK#?++|ax)1`KAG{ZXBBWwvP>xZDM^0_ZZ=?zM zur8LlC8Ox*S0=t+8~fqZ1+&Nk7rxS!DAjR!QOMC3gvJ+1w6=?nSl78#m-HPQhIBmm z!%+Th>5AGm))1yF9Bz(js<|+5g)pFj{g8U}n|LkR?QrI#KX@=91bO70IU+Viu-C_e zf2e$I-H1C8BLR7&aVz&ROVL66VU^7@B?$H^FqwSU9%(l@vBB)XYs$(i%yqfGy6NnMvv>Fj_EgFX?X;OefSKa3YP9|WP}p>z;9IqifD2<~4aFnw+P1zOX8r432NoY9 zSZ$`0;ZTI92vdn917}Se45Bs z3R93H5R76ZL?Fg&!LHM=t`7MK1TnS}2sRy$?1c+gF(y?hF)JvcenjcOujUAc45sk+ z=;{W?&YtP9{jVJ&r`&!6|3AJ!dvs<0b1 zsA-48LMnYL@?cX_wS*?_9s1NFvMUQ|I_uXnET<9Z1lPlMPuLuEcrrVwJ#OHsJD(SW zpG9{kzod+fb1Q&jP@5fd`s2#lN4>H-J9GOdDuWNZ^ZDN{&51z22H8R> zqkDR_#;VH*_Ajb*-MG}SWH@APV5J;n3-bf{9`}VF)c>HMFP>XjLLca4HRE~5@$!l6 z9K4r&vWHRPq$t!--75WUf>-bk>dmBs8HUgW$#5TzsRQ?EX-V5Uv0{s>LWvQmS%&aV zMfSfdHksuj8HNDe9YgkqsPux2XuYiyGyfbt@Re27$&$7DV}u*VS{mUlKSC$OHi<$E zE9=YOhCT!AG_d*6{UGPi4p8^<~?_e+@p*5S1GQPbk|w znVKQW5Xlt=nqKItgVjCC5Ix#3f{zakoVZC7yf%z->=l|hoBnDCITv7p1k($7>eIE*@ihUi@GMR=dn6| zR86yAD)8Ww-j=f>kfUiI3G7WEcra&vwTZ9%b;7l|2F}~?LY@s?{*ilOjR8eY=b!?f z`bP)t9oR(?a8El}GDc#JO}|tTDGO<%2?|?IfwJJTbY%QZyv{C(=@Il1S=3)BC!vwK z9V6dChIWtuu|Q*8~iq&)4YZpX)Kjr+|oe)HcF@|{2CJ!4$IalC+lq^*Xk4@ z@>PB3Gb6H!m{c+($&B8frX6qXlotoiTr-`x!Q3#kdDX9SD{lgaPzB+jnQ}bs?g&$yF3_Qbdcq0HW!HSN z14_M;2jdr6b8Q)-`M1K41DhU^5f+y@PHNW^V^JX7mPUkiM7{$-Pxa=ptB&GVj$t7n z1aSf4HL{jw@M%xk8W|cnnTlCAQe(d^ejSEH^`0a0y4bb%OTPYaRj?ySqE@&3`DBra zk+3_6*GhY`np&~J%i!B~ZLH2%36cD@tfdw%`4((s)XghNIf`Y7G&L4>s`gtFI$rN~ zVTS8h{u}sz%OWlaK<_X8=!7_s>mh*e81#H>i?*0nIEbHDBV2z zBa7~R)^BQ9PLzdV#>GigtLNy4Tg5i1eWD6c&q3l6NEj~ack}%t0Ip+@Ra=xpDUZ{W-12^mIG>ui0Y7 zP&68+EsTz+=4)Tng_YQEup6(0&=iA!ycuOQw2KYw4XN0W93HZ5VLJ_My^M9~_i>D& z_^z=x=m{CrUmq9lgFsuTM1lv0w?dGd{M~ey%a=C%36f+)z*L4!hSTZ5FI!k)Oz?k= zJ6Ry>UCVxw-FuOrj&IE{*gX3bSr}>AYyDD5K_}!i?2vh|IU0go8$R}_Yk$A@%KB2- z7RcI(5MTl)!~r`5f!U1aAo0ikx%Gxysl_xl9NOiPqr6DurvYbs6>^3U);qjarOB;m ztcDe)?s*QRBq~4ycl-%8VNVD4pvlayZ+L5DC;(6EXzjeI6dOX7Euk(}qOj+u1qi}D z5dQQj3$Vvr6UnlK8!Y8TG}9XZfLQEV-Y6k}GrUKo4)Gy<8xKu z%(};tU003M^T8q={EK%&4BfJ5TdGoOJK2ta(i3h_eeq?jmA7bPz1&}|+)6*lA&Qi8EHhnL|GxJ? zxcvz~rA<0=CyRK=8KT*b+Tc(GiB&*3k02a266^_A)lY{^S4Jv&M*W6C=XY((;wTb4 zyXqA(z$<9r9S5kuJYlVEorVPe30pmAQtMIxyRxa!{id$uSzZER=0`@PM?Ovg<@hk* z@S;}7=%Lg{?DJWiuPvwI-yi^#!XL7|)vq+C9o1?r8R4>gvpdNX2%_X|2|-U&O>H-S zA5_Vi{M`kxuU3eC(Fj(N>FwOjx9j})e%s*W*g4*B!i*asplA4~>@WyUF`N2;0&eAA zW?6=Rc48Z4@ih|sb$E~Q1p>yh{SdB2Wdv`S=+?}*(+xda{q@0D$}i15@KxLe@f*7i4;#* z&{k#s0KvOn@+r6%fj~OXs}p9-dYQS#7LkaUOL+U#%(>su)i#7`@9kFobp+!c0AWIfQz%1mj%Yypbm0p%Y@73JhcGEf#_p!+Rj8 zui&Ho2TGO@`Ei<9$Uwz+>JMnwOT?_vVpiLi7=PTylN26ITyOpKRrXX$JUcFn)0xG2 z*>csDI}Ggl&WjkK)5;2=EOLhBZjK$mFVm1p67iunf&ehXAe<`n{C)J1CW|pgUNtaA zLjhj@A^~&&YoUnV(#Ie7RQ7-#FfI!LxgG?u2BE#wCD_N0H>`5=yC^^b*$Dw0T7cMJ zA*At02nl!Yu%&?|?T%}Uz-Aeqd_ z0tbHX1TT6KDsmBB<28`Afof~~C@4q+yQJ`BGI4!i3Md!yHDa@nk6ejdq=>Rermpi< z29O6?-^qYegoYjiiyf9V65ln*YZ2j^kQ`Fk?=$=x7$o$377}blNX)$zfnaG!+7*HV zljz);hYqsV-Sp0Y73M298W6JV3CA6tPMZLYB?P@wROKa%<_QPekd@kL>^7tb<+pRw z*j2f)>)*6o4LnCky+lwj=5gMYV!y0g1ysh1loWO+!uib=NHX68Odh-s%FvgkpqSZa zq#SgLI{GC&4-i^IyQzrT8)Tr8OM|$2SaejB= zXwOw5}l4NxSym*SR6hzRwL3|oea#>dUCNWmTRjnd}j!R>20yg@Ti^in^ z6N(dN?zG$v{F9JMJJ`2kL&fV)oa4Rh8Y5|u!$7=|N==M7VcCco3gxO}A@o{t{4$ye z8YmIyorbVL&Acxr@IEI!A5w(`x-1+Q(pj&aFYjMTMi!`N#^ix&aJ0@ zwqCKREsR@|CcI3-RNaU4-r2uY{IB?!jb3$&lQ?Q%1gbN%LMC#A(B4f5NN;&wa?VuV zoPe_;fNs_}aU)-{Ngda0n@c}p?AH&P=E-+xVBb!!&uR`}8OVYKT=i*sXg$PGc_(EJ z@{=ZW1{A2gH!7sgsj$NAiG%XsLG<9Z`lrWqr9yMs3&YAknSAM!`*V)-7JMqPaP*{u^ZCd^m1W(hWgK4r(Ma@jjaq8&b>k~1^eyOc`99`=W&06}c6^2? z3PG@y2)P*1PiHG8rpZE3!@Zj3!YiPnIl1xzAqNqoML-({YAP*Ix3-{H%8*KHLS8P? zW*CLtMF=n?RH!lsgl()1TmHHO?63>9W>7OFk$nPQEJcFP5KuVm7souN`s)>PE`6v( zZTK}nldZ3vvAogqFksvlijZIfLRcMbF)07w(}6t1{sLhFtmQ1PO5J67#AF*GRz}88 zTmK=uwq>s?`we;p+x94Ur2vsv(`p8@&&cA?t?B}o_~@#~BWEu_5jKTI?%k>l#N6MP zD&8Tnc;2G0NNNbQMGeg?fx7zTZdJAjgQ|15ZECxlRXYK%p9bEcQO2r}Zo_18ZYQ2= z9eCpIrX@ZhNf+>1_BvKC$F+#K4t_xAneu?KFibj4uvncK_?2^N>^#U`pYDQqL%8~{ z5qk=Iy2VEGtP6I{Xtl%gZDf>UM+*+~ogoLnHz{^iJ4XK|1e}I{Xe?3yt0gPbv+Wa44FhSR;*??XI zv}*S<=-ZA@F5%5Gk=KFzEg(4;g;QL(gQ8GJ8xwDzA3UCg98%Yd7Tj+aeC!Pivn13h zl{vuT==u0|!G_*>f3J6-0)PhhjGyC);>phtRb$?S1`2;`{EJ1e#|-0lX<^@`>hu^1 zpPi|x?tkv+9hx&yiiqm3#r0YdPAHeDGH39Cm=P1B598qj$s_)N8roN#Yhtd-uZlhI zXBRK%PR?*J&zs@MCTY@n21)V1cIQzjsxkv6vO_}&g(u$;I`xdl{tx$#sQn7M zEsX7+)v2b#a{wz;j<=t-F6LI2A*!E56g_s1eGp;|$Q4X(r91IiP9Zk_JHM#>;wwE6 zaz0(L^~H+6MKU6NUfXMcM&>G3o%zEuk7U`70Z0K$lK96BC@3njkOm}_Sou_!XT~%E zt!1yo`#9&MSdp=m^!qi`CDzNG_B3a)-l6v58_{I1K^%0*=W4 z>7>Xi5jHeZ_)5LDYyGUh#V$*Bg0dX6N<#sqna>&USk(&o180IyX#a^107|9ASjCWFxG0b}ly>_jU7(R+AeYOnq$2jR>(+Luv6h}Av^}xA!~pdIC}FgM z3GaK$-j1$+@b-_`I|u<+VM({72gFmPiQbW;gn(;sn^j&2Cdd&E`8YJ+5lHmi0Xg2D zRSFU|NRg;V?(e_2|Hr|(=!6`5E2a5fbf+`T3SjLPychOyz}vcTza0L#cA)|o=1VfHi))(@adazJXJUkZy&5E`BG5{dm(y#Cf{YV#GBo*nO2mfoQ2(AvQ>WDjb_$7LpG=>r;* zV{feYV)VilKoUTVzeB=F4uFD~bcywJIAWw!*31bAnI*J0;=RtbikHv2F09xjc5IC` z^RuJQh)S^XKodov(X%-7c>NveL0wX;B_^_90!PI(t%0CY>ZAcppg{;YGbVwxFv0{) zasrYOqQBTetFWz?^&o)`P(tLWzmap#w130!9@()?tgz+bt+ttT+!cbiDK8_Vi18Po zebZNa?NV3FdO-zH(7LYeT;1ww`%YFT004;e9Z+;0lmtu}5mc}DxXT_eKc)$EC148g z$rH?D>N1)ihlDj**M$V7pdVmgd@zBSpo!tFz0c`-?iK(~4B2YWuk zy-8V|9hS>KqdLl!70O&)Ck3!>6+MKwbui2N$L+jnuBkoXc{fqY$wUe3d+iJJ=i`L; z6ffNM;WiMGAdg4>Lj9-f!UUK58|+(zP79vwMT|!I1j5ar$=t-dk5}Cd+BRs`eC`|g zorOPMUmFM>x6cdP+WgzV@gIu-P7xIS?HL9hrCh-5xZ`tCsq5k$XA(oawdd zyk6096k&nOxH{)UngCQ8(2N$Zk&?wWjyB3jn)H_+tX453rKhu%z<2&CSB{+p-`HZk z`FlyZZkvSt(+me$A-ue|OJQSxjUpm_K)(|sq4p=KN{<-=WE3T9)c8y`I>gWo7`A&m z8L${(c$fqXMXG*+U-kjqQN)%)9i|4YS8Xq#sqahc&j7_T zxPG-uNmLQ@1tmfXT0tgNRHAj1w#wo`njgyW_L{4=55uRmv6KmU+2q53Jx*$|acs-! zPz37->KfQaT_DXS4t@L%jZS<*yAYvqhn?RKycex#n(-7i){Z+N-$W|6EnJo&emt(hE#|@}DHT~%3G43Wq zago*S#;p2U1K6x2_TI$U@wFuvNmU1#wuVE%F?ItLaeK3=-?!nd`K|agFJDP$Fn`>b zm%?5C0y#Iyafb%^6D< zNQa(@KyqooLCqO{XGv-aA)8*yXY1)%;IHq8Ah$1k=S(@N) zbX+6x@gEO(NnCQXFibQ`k1&7J1z{jeY-?mBlex700=W`yr5ZEi$EUqrfvlx$^u#Wq z5gT39q748i>)f-2-REt1%b2wCb#;VJeA=tiTNoyG@od}2VI_v730su*bRTf`q1&b% ztRp781)<#AWdb}vDa*gA>Nry=qbPsBxBcO>B9KofD}~LBom%6l++@sqCggw~nzpef z8jCAQGs7zROqu{gDOYK`V+w{)2Sd29!aS*X+z`;#Yk{%_1i@Azsqe}K78v7#DnMwB z@x;nfuU#l-^8o%`q1^ccIJ5F7_k>c=4JuC$sG}S1DoE7dsQ&o~->;#yG5O|s*e#F` z&}a>G)PE#E%1i}i}@}zL1SXDj{SB=Man)nc9i??tDD3=AIy93~Jw+~81uxcNboX!4DKR4>uPpaw~-kp!I~ zP>x{`H(%u zT5>iI>o`wxXk|&B=QW_M;=s_?-`<{;rIahSE$jz#E!I9FR`3hF!357uWLE&!nAJ~o z#|%gY@MALGgVwEUi_p)T0WKLviD>O&RnY0-CG;wNrjy1K=@1{4l1_kJk26yirdqDP zb7F;=!6E4RZWE)t0+R&v{HNiq$#3YumOGV$oZ)&S&|f?1Zzp$I0z3iiep~lIIx02l ztv{KX8X7mwH3s*9o|f^I%8W>d`b3M`S?Ob3H}w2#p)$E3nVdmcyaofIrBtW#roQFJ zax2%Q(|%?q`)uyXYM7>#g_8#8blfp4c#s^o8C^z#)A5BNJ^+GUPkxI+wXo91xnRRA z9}K%D4gVhIlA}{Yfd$74O*$=s_BLu1CDOH#6&6OSDx*&%0xT-WFdK!Uq1QRyFZ4<| z-Qn6sR%if!2WzQe|FTD67`~~oI9BkMwN%5c^zos79Omk=mJr}m{R-&?kNVTuK$H_w zFdK1WZ@A=e*!wK6eE+gaBNshk#*4}NM}H_*VlCv6SPz#UkpI9f9)%arQb!MUS~6;L z!GVq-)>1jzL3ox$KM4pR7IH5^Ip7TEjdBwL`NuOv3y#wSmp-P)_`AcM&Pj|Ju%d91 zPK_rJ6KVAHC+>dBuf^+ZlA4u*K5;r%CWN+qrzlI`^7sHD%7wN!{+QE8L-Br87DO{7y6J*ZJ?-s4`96}Qh=o9b!3rl&t0 z`IM(%e2w!Vn5M#%RGxQZEd_52)9lQ-hg(sN-J6sQ?(71G1oHLhg4Z;`=jueQlO%^P zthk0_<6KI{Z;yAQl=6H!uy(ToN(>rw+`o9V0h#+<$*uIJ%En99N>rHoo`E|tK#mJ6VSWK`Y$xlpkx83K{5Q;d)ui;W=gP z#q>JYq`BYEpCWIcR9Ws6R>1058dcTU*%D>6x#pkO#; z%a)SO0yrgsjZ>bFm^ff5TiFk@4-;nIn3$(mDdvhA(EyJOI2Sg^!sCOwa(@eJOF(E) zG{P$FAF|&SWV(*>(7?Im&CNw3%KS1uUIyL_e2STja76HFkKCVg+0U!r9i~%4m6?DK zzDjaX(*mNaD=ox<`sZX6!ufz+*~k1%Nl{ntG#RU(cKl_fuJ^P6+S0e%JcW`S`)D~6 zjUy0V|BB}|+ZaT8rq#oX!7v}Y9MucX;3#XCBC++;Hlc=a)W@Xhz}{5%d(9X)w2*#8 zCMC-A3(HC=D2Py@+%_~?hL_*bDN1tnhU1jy`K+Z7A;!q%hEMXtSNItkj8p^>C+$xg-ou6NZANkiCHC+>LhOW4twe6v+Ue)aQ1)>r&M}o{u=pg zQun;v__>VZXtwfR-MvhThTc}aV2>Bw-Kp++gNk_lF`aw#8^>88;qyb!FPe32q*Lr3 zyZ3Zp;FKY)3YW{Cg>#kY=6{Y)+tSf($7uXqO_ai8WFZ0HU*?(Fah9|a*|HnR8?Ptd zk6E;{)DO4n=`$7KKF{mV8jFv#oo%}hF9uPsoJu#J6p1j%nDie)IK}AmcwQ=A{-FzZ zt`f@>hZkK*4o9(@8PqI9QNwh1&^2nznb_@Bo(V2Ve@za=#EN!2J%Tw3|S$s;Z za?;%jysD|+DR`wgxa!t%n{AbVwPX@g`b*swnzEkXuAKuaJ7%86t(?qV+_W7?L;=G2 zfzu@#tm4p{u()fFk{9VqHT?i{r~0_xTqsrc`F&77KFB_Y^@%t(r^dfN<3 zv@mf~TdKSGtPvbqPCp`_BJS9Na9*02hs&qAJGHQ)#AJR@O_Ir#cKe38S#^fj+!4-n z-z6ycI{c$nS;mV*{m$A6FS<~hhwic}LarZleT~F6O?UUK=#gAC{g)Q0CUyH(E>hiz zopRi&3qZ=VCT+xdq_juA5TLaWF;BSlWRikmC!=379_Oh)_U3*nzA*XGWLv1}+X!)} z*XY~Oq(yr!5aa|(^Hh^fZEntFrDwA0de6FX+EbyDH=vHr8-A_Sbgz+pDXZ9 z*p42&GJ(&%T#-A*9gR|28?nxOt~)r($=a;hVtKySq?0IcAk}x>q*WxIYfo^#9J(Az zJu_4<=DNmllSR84p8F*8R&boj=!VzX{hhv%Eis9e6xC)6?tJTVRjDB(B}X|d{gHdv zgsgg)*!jlkAiKJZt9<9nS@hr;+Ck=k{3PEBuy#MbS345|^$xXFbm+k;CBZSO<$=_g z>;#tudBG*BAP1FrAx|j9f)$;;L@;s=K1m!LeTQDTK0_Yk1Q6j2iCg2BD?rcw#%EBl z%vvh||868w{6;&|Oq_!GRbKWLARShzAO6Y$9=R7#cDbcM))_VZ=H{03`6OltK*SV& z*_S$6ChB11M*BNU)ZR#bvhql&-D@yVPM|+^G+)#qJi%7J4;&0j%IUwjk+EQldg6D9 zI+3=`YC@d6BA3j58u~N1=3-){zl5FfS+DtiZJ&z4j_@!i%h5IX_X&*4Uce zF>A)0w$a_&7BD!4ulyBJg;OoH6YXEP?9<6{c)PNYH!+w;a-&I?PepXDgfSc@R@^!d zcBzrFGP5u)3*_FFh1c_+CrX!Y4jKK74?#pH^1Lb}$b;Z}X1yzVfrI>>CyD2L-*XFA zL#s5FF7H^E<0X@8QvsC0=b#%NS>JXDG?tFqmOqn>t&TWGwD+Wqp6t5Ds=$<*JP?H( z8ylw9D*~!v^<%AaSK+_@ek%39e+zB#KgkYT+K}%Mw6;Nb7T|x{+W)WhJ`x^r2h_{# zD1O;bNJ+7TlpOfvqc?&h&-tb_A^W?^p@IMCA#D^4n61?huC z6jJ}|DoE?B@RZfRpa1@{|Cgcv_d7x>{tvU^;>viB$Of?Ph-WK#y;8#xB95nl%<%6g wLJHI~|M?h#Ga~=_?2-Z}6aW4EfBNMOD$ie3(iP{mgsa4#F*}|6r{iD$2g4!^@&Et; literal 11218 zcmb`tWmKD8&^8JQ?(W4Minc(D(*Qw=yIY~SThUP3;;w~4@#0n>c#1oPLU0IPpo9R$ zihX(B_uu#9th3HKYu)key=TwNzUG>lJ61$Bm@6FFf#>67%?zdG1Nec20{79UqUi03^P#?hL#mR0@UK7>CtqJ z+?4hST|2-;f*5iv#od`G1l~2w2M3otRrgcFwaGPJ`e|)Rqy#BBR%#JB7CAk)NDLmm zgPo0-=+NR$yy)ma&fk(h`jrn4ixp}W`pLf4hem6M>%A>57wU&{0c*ocPAAPQUwPw- z|9`x`XM#Ghb*Pkci3DYXL>0tR!4v8gzkbCH+SyJQ$eKg8#+(*fO*{VSV53*1VACMA zk^u&hdf_x+Ylk>cMQ0 z=jI4&GY(7N|CwdhA&a?73flrV|K#E51%m!7R{p>P!LBHx+-P(`&;Wwo-81qw_^BZ% z*bPSxPX>Cn)WUUjzX9Cv$|D6>W7JvQ{8?5yDsv|?WJT*@a`y{t185#uVl3Agunj$_ z#}2?0hpb4uuh|QB#MJS|&2s*?9>xRX2xdi1Xu$EkI}M*++A)oSF4Y@=H;o93>93rh zW5EAiIQ^=qSD?3XsX<~S>i?sA{-veKL zfRc`>vZbF9ga6J)t5U}l^Lish5W@zSaiJfsrw^08A|y3&uBg8eYUpMJd68!hm)KtJ zy&lLl?&vjN=2s@vCFJC6Uk{~_B9p?)!PQi)+ZM8+O!Hn|SXk3{$xDfwRdf*l!Gu^d zlxSx$*=e*RIB=ok^y9nbshsx%0?}IdVxz!9 zUIZ1C6xvEskGUvlu4FP&iL-T6Lnqg+nIj@C=TI#646aV>6^#NsznIE9&hcM))R z0n;gZdZJq=SqQKq^#cwA;w!gYj+)B!_SzyCNLr7XBnBTUV^j5f7RL9f|5;0P>b*S+ z3IJFGjsE&y5lQez252&R+Ehp3_Vqgp1P`osu2Kd=ojY^+-q<&)R>LYczo}XN59o{x;pD;2 z=xP8-;8@7B#^@?p^Wh!Ca+33Umw9qRN5}vDSt0vEULolnQ;=TYx-CMz>`4O$%uvYvkgw?W;E1Q@)5ed!eSXWHqV!W(y2KnaPXB* zjBm7Sjp3Ux!9CQOi%mLsu-hl)(! zBmI`>%Kz$_d4fngi@R|BPXddsa0b3jFV>I~!V$uaWCtQo33k z8O|mWfOvW_-#ZoJl0F z`|2CZHP9!b08^r+`WaY)Qcy)OMSWF7#9(orKbXnW<@Dxo_h$g1PEso`S%u&phvQP} zNZ$qf&gNBWB}UEE#hx*s8%AlHsa7ZX_|8FuVZ2(2Y3rHs+eW5-*we5z*(dEONSq(9 zQd0KX*cU2|1aCze`10@zaF`X@oeNwY8pa%>p6T6R6LCt&C&V{m)l4}?;ii-}JSXRG zqtZ*-b|@`7*fgjYk{*M*aYI@7a?gY4ZdPAxVBQh?xQ9B;95v@6v=Bp(BBg5aFT&+b z3nk!?cj3_i#Bb6^w^p1q1%z_5)VL`)3lCZN*YAhd4wRfZ)swYq2@vi@T=BoPY$skh zvJ7)LZbP0;e`sa6Ci2SmWJy9Fym;7fp5Mc%p93GI{ zIjgQ{Vez{nS!;E$@`3;)b5e)X14EvM@AQ#`!S{VeBob28C(FG5n!V_YF$N)nloL4= z``REUL|=GGQ-tvPX(sj2+Q{=5vy8Bji@gZdm>)OS2TOfT*;DFMVE^Ov{;cX*ywwg7iNWN%uvzuLbxX_5(uX~IbbRU$Z9&X3h zpFo8;P4hmHm(BaUoT=F?ncE}<(7tPm_&uX>`;fBSQsF0Ouk@wUu0xA57vL>^*E=|! z;Ym{gc+5JQ>}~%w#@9E0c{Jcna~SPy04p`w$;Y!ObpQRCDw74yZo)3SwH}iRJ$^oLZaKQ- zoq2U(H?o_v4YR?tn{!zwZ0_FJU?1x;f5Mb6yBV|3z_`{z+lF%HRJ*LB1zuOcBA)W~ zqc$*NZDGwf0Bue#^x5KJjXz>cWsMHJ@V^Rmy?c4|=fgOhs$LblUYCN#E3nO7fc&WWrC0z6&Y z$ezHkN4_q?_t7zD53^0TT|$pa5Ub||P8Iq#p72NQlfSKqJAH_GbMDuIz6fgX^bCzE zwFvWeBQafSJXqN=4eJ!z#uR22 z4RujWQZLNpnpNQxagcYc*m1$V8DatC^)E`)RWy;CaH~5prpDVXe7n>?-eH zM~G1{)8UH!qv-Qo%`fsHDyMU_z26$x0~Ow>`^aZg!+c@1GP|pQ{NrZ#>e$!JU@WHe zqseK1T5J0E=W5p4u1pb;q!?0K>J^!9QoU;0zmK|sMVZ0a2 zLy_|BZlylI+bcIv*l*+atPv$GwjM?S7BlVxBR$i{R9YKylp`mWR6>_Cf)`Oou<@91 zkDG^WpzX&nerzzMB20E-7k5(8Lr{1WsS0@nrELbVEqHOG)E&y(qrHykXvwE2!ZDn$ zF9xnTWoX&_xhd+77$2l@+nKHL*gE)_=erufJcz4i#77bHbdfCIGex!wh$>4)jHKuR^* zM(~d*Tr^M!Uroq~w}V1(4cyL2f>Ki%UO1(4#=)L9jx=pe3`3_H~A-U z2CU@%$A6f2lwKb^M~Nk4)()D!?6aOe<_Wlk6Sau1s*9PQK>Pk~j7E*o)F=eJ4&RoG%IbejQuRTp&cTkFlla`6 zKUn!?k`ynQCt8sKXpU`+v2nFpR!PqFddH7*!!|;lgGlyVroZ=ggHwS$mo-%@H76~t z_khY$zXM!GY^kqTT3%`sXFE*=JeaFad8som^)xzv0gyePj@Z3iisSvN-t}UDt)9T1 zjfjZ4)lvKhfdhF5jT)$NNC^Uk0XBI1I#aQiKBIpOcq9JfDrGgwEMF5yfE6S;zhG&$ zcFy4+-b9Is@aXalf*(a)A#A>Y19xRT)V2ROH!f{p!n&+|HB z@YQg`YQm%0T&h2lJip3%YZ)Zn!?@P%8WkU(mPbL|Zv5K66P#WziE2v-= z;nu}ZZ-P=ZQKl7xz7|9)O74O2k7wI>FJ%gti)~H0Z-mFWliXPnP6~dR<^6;>d4iYs1qXYQl?OpL2fy} zZ2cCq3r?Zg)m5(mmk%nP4z!)Ul@H$w;)oulb_UT{ySBV@uyBp7+A}oi`)f_VtHiIZ z84p`HqUC{?q6n8``?1QLzX6iYE?A*9 z(^(Zt*eFXiHSgUIP}`#Pn?d$vl|(zACRCSPJn-k;y|wv977uR2!$WmiQH~vn7lmu$ zo+@~+AQx!bkXk%V`WQ2qv2|FnGtWt`+Qv=kM`;B~UpcE2%$~`PhJ7C{XV|}Z^8Gzp z9V#`TK20kIQ?H9PNwrXJ18=^yC+ZN^7xMFto0NRsUNXKYM(q{d^QU44knfP5U!IF< zVYZctTds1rNXoKa3C%yw9Cch97GK#tc(wU#IT}=r64Zu<`dXb*4t^Y=mh<>$~VuI8O2i8Er{_V`g7R(}mhTNuRm`|FXqm z0tcVX&ae5PGxkILO(KqQk?`hKm9gC3S>Mht13XIHnmPY|fQKTeg7^$*RYJWpUrPW} z{iGAf>n~QPBuT3|bkJ+!QGN~qq+PI3W0&qgrxX4d66X&~DthTBsJmW&WCk+Hi5Ng- z#nkSkq7K8B>#Gf`tJ9ZS!-FY$EjS{TSKz|hS{ZJCp@IzjwYINg+&8ufwv?lMov!)6 zmt8n0cV@My26-^=ErVXunaY`{vfDOS8l^>$T^kX;amGEbTyF{#^u=e%Co?yIXtG-T9v@JD4fJ1&bC9xc)usIGmlw^x;~V(haNAINZY$TiJ^=CP3kT1kSgtr zSVzYh*azo#sgqcvXwEE?hwlM8G%-9&t{;df4-+bwtjMof*tG6=^YkIhbO(xgh07K& zikXZC_p}BinslQX7Y!Od=V6|Bwv$l!$oyxgS|&Pv&C&vv+eY{MGpfy}3MbFIEj;Cf zMr+@o9pVO$F#(n(>y<|6y%|N(w z>sNf)uNQ#F;;$uh-xYkigV97c8JNAmWf=dHw?=I!XUZ>`4Eex<6}lcqu%y|5jW79{ zeW|7STRqi?BpFM`v;%h{Z?lHQaKCk~>*3 zL^rZoAP?V&gYA1sFxM|~*IiZyx7Ix>gO;CbNryM7N0Sxk@OD@k^wV-!F21$a`aekH z9~C1Go|;xR3m>L2WKB;WhSX!S)cZbSJbp_zlfjXRjwlgGQ64$-v<@FT^qbUkILgLbD zWe{BgYpU<$@t_g=DpaEud){abfi^G4rSIA;rw_CdT&puq9Sx!rD2&jU>kJ9ZE(+X0 zRw1a?)&;D0|K0f`q8W)8YqL)B-K19M7`%OecA^>+%?YaixvCJC(i>lLE#ztMzB{L7 zor$!kK9||jxA2vp1LT&XgDMU>)ukH9oQMBeLJd&yn{N#-^6<4V!oj!Z9qW;TTOUtv7RHSb86^Bq*WW`+jmVM%FO~S~1N~j5 zlN)=T$WiV)fih?-)?+?If`lC-Zz>+A&w1RLTYnUYsdr=D*$meq3gnSOU(U8@skI73 z6W>>8spW#~{;p@XF4-M4uDx%=9vxWzF$=DBSZH;MGh!zpEtyq8(<4T-e%?CFAZ5Qh zXC2X8$tQxC!%}}+eV&imoe}M>Tx+?ZXdrK#UCtW)GjqPp{8gXo++^$`5kH0VN7;67 zQ1~KT22rT0k`{P=03_s8S)Wpzxhl^tU8S z0`EWg9~i$fD^Ej4euVf7p8S)^YmlTDP%%}T82%xwN&PHzZh$Ayo`r(XJU-s;6 z#z_CuHKGV=FLahWp>SyXCGu~nRLmJopGX>I(~qzMW#s%J*}}fN0VyBS;0Vb1k5BW= zx%zs6ApW`$)|=NVc2H@=U1A6vrlZ%3VJA#c4EY(R{zN9`Ip~9{zQQW2^0!C1cKgSS zu{Y8ZK;gim;U}u-(@j=vusz_2$V8_jyKpBvh)}V>`hcwsGsmmcn0xpam4Sslk%8ID z0sk*mGEA=I)l+Tts&iBj(m~VUNc4Z>wuYmTH=APJ=HyDqSLD!W5?<^Q0+4P-6&k&n zs_IEsMMVs9@MPt~vA%b9Zj&G^VZ8&v$M#f~KwYXb{yLYFeg05OKvF*Mn-xp8{@ZWs z#cfRYF@riFfOg-d6CXw!MmD(+Oa#*Xeirveb3kA6Bl_@N%g5Ke6JVI!N^=T$pfDB5H|N zaDEp0+`R|oTYfa*$OI(8*~aX9Whjg@PBjtl!9&3Wu{G~-4%W^@P!<*f$N#>fXP_zr zXr&6O#Hsegb_YwCr^=L7qf>g9R-o4yCpJ;Q=ggx2H!U7NxzFHl0}S?>1ZwMNw%skQ z>q+n9ilafd5MFO598}@8Y_%t--Nsx>8g2=1!) zTk}e&K)NrV7_yF)_L+5(9N5?Ls=xoQ)qx%zzi7%jL>D=O^Ox)E+uEnChP=Dj38Kz6 z3}mcVeE3w6iZ(OGm7ivS+(T z;vrnHYKSs&Q9L!fGn!~>WzBm5pw0HYP(sz8*n+%Y8Kw(M)f&3_(=C4>nwRYbOc8aI zMLk2|p;B4`R5<|w7(+RnUaTmFQq~=f)P1g4i+W3##8=WF5)(>gQScQJ3tm1}7k%(P z2}r#q;R)~~~`}D>cvtAH~08n%BR6MYwZ+=ycNAfO##9P_uc@ z-vj^EQ{Dp&DfEzYGJaRq#!S zEr6?TwNL5n%P~EH9gyu5S}wto1w=JWb}}eww;tvlHaa_J9^Hk0MQ{PphYZE)}&S7 z5?&|{v~pc>jxTWm=)?-V-6~gW6P{+v>Nkxs{3XZ?l>1E+bJR!YD|2ybZ5P-Ipr$Awq%%574w?gtE{E9cXZfjXoerVDes>XZI295zI*K&vh%%o-Z|`( z^TYd9mjDbNbJs$m=JdR>aH?yW3v<(+KNwTZ@b#0QdZ#RB&&Y_7^=ASMtQ! zOUvNMn%ujebd>NXm*fM3R4r$b-C_kRQNAri_7ipvlNC#2K&1}JMA2bP`J$UKpu_!< z+&&IbqUatOf)-RSqhQe}AAq#uV&_f+V45=Bb>taptLy=cZy&<#HAxy}%x;)UBahY; z7EbBx9WmrV0G8#a`FzFYY^k~hR9G(a17%!B4tjuKw=7mMr?esw;2%%s}i=0 z(V#O~qys@e_7I)*;Em|AWMv)+(@MFocFXU7E5RP`$Hf`-hIuQ^3>>5!m?p|i ztq9n93q|BEx6zFEUVarVLof|LDm zahI8s-{<1(PH}vK8J|J@4!S(lxQPo+JPy(TR^U*-8zQ$^7qGx{KF zvwiJlD@i8k7x_sWSiJ4NAx52P@gBJ9F~mEu2orz!bf2a z;datrsfy4FdZku~WUz;10%$~5{8E1Ho{x>PLOM}2kUji%JmqNXHhk|i(DGi~Yw6l) z@;>B?=w2AiGqPGH>4MpQqU?F!n+--Ki4<@*HSB4IT82OJTj0x^P#(bu`mH;4JhOv7 zes&Z7FPlXH5pQz_)sC$u`%- zj7?mPjio$BiC z$Bj%I;K(NUdNk+FzmI>0fBX_-bIt?=48`d+I z@v9q^z2&Kk{VdRXRl04cy<|#pl|+t7**DX&8oc?K@S%rRD>WaY$y_CRxc7IHpwPiw zQun%QUsSv9?+0^^Gt1lGsy><$|J>YakAci6nN|w&T0PcbG+KDYs`ND^=LPy5ZY;?csl*gYrH><)<>&!zzNA3gm)T}0Yi+vnP+v=$Z2E{G^oiZwW$$tQiFG0Y4gLeiQY>pT8DB#PA_L`8(9ieEpDuVCQ94|enWD*VXF6c$9nHvRP z51Yk{I&z3b{u2S>i$G`Td?ocR9o|r~{|wFaT&AOJ70S}4&z@1p_JJp!FwF3OzqMDL z6~o>t_M1{F4o>Qu9{m1`u9;%Au6so}ekdT!aT22psk zeXzwrg8d7-YUy#9$A)(opp5G^Fq zop$37r?HyL$iv`FeXGC{z6LVQprL3-N2z>uUxLX=j0f_F-Ct_W7qM~m<4=>YPeRud z4Z+mE9lgnlHN)K#Zr)}N9_ViA5jY0qK5A@6Sa+A)WdxB|{ukyTbNdGaBCpH5L*Mhf zoUQURf}?}V@|+)M!es}8I;W5;WVhxk=RM;4(e0Z!gKHI5lCa~)%+m-q@$Qja=cxg14I6nS7_w-^~vYRwW03C1Q z91x#e_I5{kQeWEd#*9yN&q{5X#|sJ7NpW%NPLcIzh+qWYgvUgmcqrmzQZ>O|vhW>y zF+KQaWn6BraBx&L>}T25S~?{J1NCB1l+T3QxOS7S>cln@X;?0D@z$e)u!?P5a~}oR zPa39Qf(1gNd0%r5p|J5ON{VJ&T&Z*`?rRKrHN1{Slhr*^a!Tc^vUE< zjyc)UtvmR~;KEOn?{nq}qWYH4qZ;=G80;msX>nwyP1S3QV|b+sW5hP1UvW8tcKbd< zA_S)DG1@}lzxj&diu6rl9>r1ex&*T>7UUC#@MUdt*JNC)DBiRPB?5tuVIQ4zUf4Do zRi^7Ei4IMW2CsK`W8yH%O3Dw>l|Uq(fHMdN7CH_Rd1JxUuR~r4473u@tHr03cd?+| z-u1WyzC0fJD)l1m_7!t^TIOl={)2Prf~vr&p@;aXXH@w>%r`1<&US!aFv%crp$gt6 zu%l(;AH4%EQeg6D1jQEJKBK)BX=9jjf~Z`oelMQ)NcHy% zw|(+V0KPuguQn?!lR1Dh(BpXf-QBIV{NjLJUaccG3TUtSTTgK6$qJ)gwtA|IHhar< zC>YR2Z$DLAfg3N%Ei#nvlF&|j@YW>$(`oX68Nwl6!y|r6(Ewe-(F0F^(pdefPuUi; ze$bBn1>7cD?oyf{ii2h;rFHsrfuNkKZtopUgsJ_+qyMpaKPruECF>Y_`(0pkY#l+$ ztb)A6P*+(Nmu*6>N5YFyI^QQKX->VfCqZ4G;LA^7V`NR;#6jrvAm}-^Q-T_UeHMb{ zTL}on`M$BG)h5AIWB2-^k%=&Lf$rSa)m=F-E99#dNF(?AdvS<bV7ddlE6$LX+Vhxz$8o zh)CTbRk2?mFLN0&x?bP`YI+;yRO$s$ru$74E8g)S7^QkU&${{vCNuBRB>+fVD+0jhe0C zc9Az)*u6LJf039oTJ}t3r&-YTScUWaG6cj(-$D>n(sEgcH(fM0&4768dYruc6igW$ zy@%^cm6FEe*#Q5y7x)LgH)&J=;v}ZrR3HEP)#4WmjYIuNff#Kr+cHcuyK?DY|Cl(_ z4z)fT_k{3xN!zvdPMLL9sSO)m?wZHk78T$tuy~1jIpCUoG~6I{J$8a*16UN`YYg;M zw`7B-J_s?%MR?+P&|{YLdPR!JnhKpt8jUc}3O}?`t33|pl$s_ask33yK-q4zQLTzo z7RmaXE`nT@dHD9To+tVpUEBFn#X<-K3eg;h4$W=iuiHLD*M9#hK zAPs`1YWikBZZjee&-3~%`olL`p7HE!a{m4M8vK0y2Al+D2(D8t+*jl@2z#1#juJun zWw1>m16d%^M?EB?CpjNhdv%xM{&auJBP%->v|FnOv`U7!&m>~=U=DM%KRHjmm1STv z^E>;?_WV~kVAZqsSaQhXy|vKfLQM;;q@fq~+jIbZa>xy!iZ>-^pU{ufmMW$tpm(W& zG2>F7^<90mC}nJ9_w-+p;@`{f5{J5ccd5WdmgGLj+|M6n^h?Buv+|good<% } else { %>danger<%} %>", - "author_name": "sanger-tol/readmapping v${version} - ${runName}", + "author_name": "nf-core/ampliseq v${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/conf/test_full.config b/conf/test_full.config index 9bd5e2e9..80fecde4 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -10,8 +10,6 @@ ---------------------------------------------------------------------------------------- */ -cleanup = true - params { config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' diff --git a/docs/usage.md b/docs/usage.md index fab27df7..b796f4a3 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -57,7 +57,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/ampliseq --input samplesheet.csv --outdir --genome GRCh37 -profile docker +nextflow run nf-core/ampliseq --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -76,7 +76,8 @@ If you wish to repeatedly use the same parameters for multiple runs, rather than Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. > ⚠️ Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). -> The above pipeline run specified with a params file in yaml format: + +The above pipeline run specified with a params file in yaml format: ```bash nextflow run nf-core/ampliseq -profile docker -params-file params.yaml @@ -88,7 +89,6 @@ with `params.yaml` containing: input: './samplesheet.csv' outdir: './results/' genome: 'GRCh37' -input: 'data' <...> ``` diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy deleted file mode 100755 index 9b34804d..00000000 --- a/lib/NfcoreSchema.groovy +++ /dev/null @@ -1,530 +0,0 @@ -// -// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. -// - -import nextflow.Nextflow -import org.everit.json.schema.Schema -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.ValidationException -import org.json.JSONObject -import org.json.JSONTokener -import org.json.JSONArray -import groovy.json.JsonSlurper -import groovy.json.JsonBuilder - -class NfcoreSchema { - - // - // Resolve Schema path relative to main workflow directory - // - public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { - return "${workflow.projectDir}/${schema_filename}" - } - - // - // Function to loop over all parameters defined in schema and check - // whether the given parameters adhere to the specifications - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { - def has_error = false - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Check for nextflow core params and unexpected params - def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text - def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') - def nf_params = [ - // Options for base `nextflow` command - 'bg', - 'c', - 'C', - 'config', - 'd', - 'D', - 'dockerize', - 'h', - 'log', - 'q', - 'quiet', - 'syslog', - 'v', - - // Options for `nextflow run` command - 'ansi', - 'ansi-log', - 'bg', - 'bucket-dir', - 'c', - 'cache', - 'config', - 'dsl2', - 'dump-channels', - 'dump-hashes', - 'E', - 'entry', - 'latest', - 'lib', - 'main-script', - 'N', - 'name', - 'offline', - 'params-file', - 'pi', - 'plugins', - 'poll-interval', - 'pool-size', - 'profile', - 'ps', - 'qs', - 'queue-size', - 'r', - 'resume', - 'revision', - 'stdin', - 'stub', - 'stub-run', - 'test', - 'w', - 'with-apptainer', - 'with-charliecloud', - 'with-conda', - 'with-dag', - 'with-docker', - 'with-mpi', - 'with-notification', - 'with-podman', - 'with-report', - 'with-singularity', - 'with-timeline', - 'with-tower', - 'with-trace', - 'with-weblog', - 'without-docker', - 'without-podman', - 'work-dir' - ] - def unexpectedParams = [] - - // Collect expected parameters from the schema - def expectedParams = [] - def enums = [:] - for (group in schemaParams) { - for (p in group.value['properties']) { - expectedParams.push(p.key) - if (group.value['properties'][p.key].containsKey('enum')) { - enums[p.key] = group.value['properties'][p.key]['enum'] - } - } - } - - for (specifiedParam in params.keySet()) { - // nextflow params - if (nf_params.contains(specifiedParam)) { - log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" - has_error = true - } - // unexpected params - def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } - def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { - // Temporarily remove camelCase/camel-case params #1035 - def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} - if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ - unexpectedParams.push(specifiedParam) - } - } - } - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Validate parameters against the schema - InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() - JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) - - // Remove anything that's in params.schema_ignore_params - raw_schema = removeIgnoredParams(raw_schema, params) - - Schema schema = SchemaLoader.load(raw_schema) - - // Clean the parameters - def cleanedParams = cleanParameters(params) - - // Convert to JSONObject - def jsonParams = new JsonBuilder(cleanedParams) - JSONObject params_json = new JSONObject(jsonParams.toString()) - - // Validate - try { - schema.validate(params_json) - } catch (ValidationException e) { - println '' - log.error 'ERROR: Validation of pipeline parameters failed!' - JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, params_json, log, enums) - println '' - has_error = true - } - - // Check for unexpected parameters - if (unexpectedParams.size() > 0) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println '' - def warn_msg = 'Found unexpected parameters:' - for (unexpectedParam in unexpectedParams) { - warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" - } - log.warn warn_msg - log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" - println '' - } - - if (has_error) { - Nextflow.error('Exiting!') - } - } - - // - // Beautify parameters for --help - // - public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - Integer num_hidden = 0 - String output = '' - output += 'Typical pipeline command:\n\n' - output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - Integer max_chars = paramsMaxChars(params_map) + 1 - Integer desc_indent = max_chars + 14 - Integer dec_linewidth = 160 - desc_indent - for (group in params_map.keySet()) { - Integer num_params = 0 - String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (group_params.get(param).hidden && !params.show_hidden_params) { - num_hidden += 1 - continue; - } - def type = '[' + group_params.get(param).type + ']' - def description = group_params.get(param).description - def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' - def description_default = description + colors.dim + defaultValue + colors.reset - // Wrap long description texts - // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap - if (description_default.length() > dec_linewidth){ - List olines = [] - String oline = "" // " " * indent - description_default.split(" ").each() { wrd -> - if ((oline.size() + wrd.size()) <= dec_linewidth) { - oline += wrd + " " - } else { - olines += oline - oline = wrd + " " - } - } - olines += oline - description_default = olines.join("\n" + " " * desc_indent) - } - group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' - num_params += 1 - } - group_output += '\n' - if (num_params > 0){ - output += group_output - } - } - if (num_hidden > 0){ - output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset - } - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Groovy Map summarising parameters/workflow options used by the pipeline - // - public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = workflow.containerEngine - } - if (workflow.container) { - workflow_summary['container'] = workflow.container - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value != null) { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir', '') - sub_string = sub_string.replace('\${projectDir}', '') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir', '') - sub_string = sub_string.replace('\${params.outdir}', '') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - // We have a default in the schema, and this isn't it - if (schema_value != null && params_value != schema_value) { - sub_params.put(param, params_value) - } - // No default in the schema, and this isn't empty - else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { - sub_params.put(param, params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - // - // Beautify parameters for summary and return as string - // - public static String paramsSummaryLog(workflow, params) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - String output = '' - def params_map = paramsSummaryMap(workflow, params) - def max_chars = paramsMaxChars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += colors.bold + group + colors.reset + '\n' - for (param in group_params.keySet()) { - output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' - } - output += '\n' - } - } - output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Loop over nested exceptions and print the causingException - // - private static void printExceptions(ex_json, params_json, log, enums, limit=5) { - def causingExceptions = ex_json['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (ex_json['pointerToViolation'] == '#') { - log.error "* ${ex_json['message']}" - } - // Error with specific param - else { - def param = ex_json['pointerToViolation'] - ~/^#\// - def param_val = params_json[param].toString() - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - if (enums[param].size() > limit) { - log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" - } else { - log.error "${error_msg}: ${enums[param].join(', ')})" - } - } else { - log.error "* --${param}: ${ex_json['message']} (${param_val})" - } - } - } - for (ex in causingExceptions) { - printExceptions(ex, params_json, log, enums) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(json_array, element) { - def list = [] - int len = json_array.length() - for (int i=0;i - if(raw_schema.keySet().contains('definitions')){ - raw_schema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { - raw_schema.get("properties").remove(ignore_param) - } - if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { - def cleaned_required = removeElement(raw_schema.required, ignore_param) - raw_schema.put("required", cleaned_required) - } - } - return raw_schema - } - - // - // Clean and check parameters relative to Nextflow native classes - // - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - // - // This function tries to read a JSON params file - // - private static LinkedHashMap paramsLoad(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = paramsRead(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - // - // Method to actually read in JSON file using Groovy. - // Group (as Key), values are all parameters - // - Parameter1 as Key, Description as Value - // - Parameter2 as Key, Description as Value - // .... - // Group - // - - private static LinkedHashMap paramsRead(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - // - // Get maximum number of characters across all parameter names - // - private static Integer paramsMaxChars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } -} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 25a0a74a..408951ae 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -128,7 +128,7 @@ class NfcoreTemplate { def email_html = html_template.toString() // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] def sf = new File("$projectDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) diff --git a/lib/WorkflowAmpliseq.groovy b/lib/WorkflowAmpliseq.groovy index a576b038..1afc92f7 100755 --- a/lib/WorkflowAmpliseq.groovy +++ b/lib/WorkflowAmpliseq.groovy @@ -11,6 +11,7 @@ class WorkflowAmpliseq { // Check and validate parameters // public static void initialise(params, log) { + genomeExistsError(params, log) @@ -46,15 +47,57 @@ class WorkflowAmpliseq { return yaml_file_text } - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { + // + // Generate methods description for MultiQC + // + + public static String toolCitationText(params) { + + // TODO Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text + } + + public static String toolBibliographyText(params) { + + // TODO Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "

  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text + } + + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file def meta = [:] meta.workflow = run_workflow.toMap() meta["manifest_map"] = run_workflow.manifest.toMap() + // Pipeline DOI meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + //meta["tool_bibliography"] = toolBibliographyText(params) + + def methods_text = mqc_methods_yaml.text def engine = new SimpleTemplateEngine() diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 88093eb2..57d229b9 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -20,40 +20,11 @@ class WorkflowMain { " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" } - // - // Generate help string - // - public static String help(workflow, params) { - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } - - // - // Generate parameter summary log string - // - public static String paramsSummaryLog(workflow, params) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } // // Validate parameters and print summary to screen // public static void initialise(workflow, params, log) { - // Print help to screen if required - if (params.help) { - log.info help(workflow, params) - System.exit(0) - } // Print workflow version and exit on --version if (params.version) { @@ -62,14 +33,6 @@ class WorkflowMain { System.exit(0) } - // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params) - - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) - } - // Check that a -profile or Nextflow config has been provided to run the pipeline NfcoreTemplate.checkConfigProvided(workflow, log) diff --git a/main.nf b/main.nf index ab10ddb1..ffe59000 100644 --- a/main.nf +++ b/main.nf @@ -25,6 +25,22 @@ params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +include { validateParameters; paramsHelp } from 'plugin/nf-validation' + +// Print help message if needed +if (params.help) { + def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) + def citation = '\n' + WorkflowMain.citation(workflow) + '\n' + def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" + log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) + System.exit(0) +} + +// Validate input parameters +if (params.validate_params) { + validateParameters() +} + WorkflowMain.initialise(workflow, params, log) /* diff --git a/nextflow.config b/nextflow.config index 9c823f7d..743b4799 100644 --- a/nextflow.config +++ b/nextflow.config @@ -12,12 +12,12 @@ params { // TODO nf-core: Specify your pipeline's command line flags // Input options input = null - - // References genome = null igenomes_base = 's3://ngi-igenomes/igenomes' igenomes_ignore = false + + // MultiQC options multiqc_config = null multiqc_title = null @@ -27,7 +27,6 @@ params { // Boilerplate options outdir = null - tracedir = "${params.outdir}/pipeline_info" publish_dir_mode = 'copy' email = null email_on_fail = null @@ -36,19 +35,15 @@ params { hook_url = null help = false version = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' - // Config options + config_profile_name = null + config_profile_description = null custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null config_profile_contact = null config_profile_url = null - config_profile_name = null - + // Max resource options // Defaults only, expecting to be overwritten @@ -56,6 +51,13 @@ params { max_cpus = 16 max_time = '240.h' + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes' + validationShowHiddenParams = false + validate_params = true + } // Load base.config by default for all pipelines @@ -75,13 +77,11 @@ try { // } catch (Exception e) { // System.err.println("WARNING: Could not load nf-core/config/ampliseq profiles: ${params.custom_config_base}/pipeline/ampliseq.config") // } - - profiles { debug { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' - cleanup = false + cleanup = false } conda { conda.enabled = true @@ -104,7 +104,6 @@ profiles { } docker { docker.enabled = true - docker.registry = 'quay.io' docker.userEmulation = true conda.enabled = false singularity.enabled = false @@ -128,7 +127,6 @@ profiles { } podman { podman.enabled = true - podman.registry = 'quay.io' conda.enabled = false docker.enabled = false singularity.enabled = false @@ -172,6 +170,18 @@ profiles { test_full { includeConfig 'conf/test_full.config' } } +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' + +// Nextflow plugins +plugins { + id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} // Load igenomes.config if required if (!params.igenomes_ignore) { @@ -179,8 +189,6 @@ if (!params.igenomes_ignore) { } else { params.genomes = [:] } - - // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -198,19 +206,19 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { @@ -219,8 +227,8 @@ manifest { homePage = 'https://github.com/nf-core/ampliseq' description = """Amplicon sequencing analysis workflow using DADA2 and QIIME2""" mainScript = 'main.nf' - nextflowVersion = '!>=22.10.1' - version = '2.6.0dev' + nextflowVersion = '!>=23.04.0' + version = '2.7.0dev' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index ff5d22f4..bc005e36 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -15,9 +15,9 @@ "input": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/ampliseq/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" @@ -57,6 +57,7 @@ "fasta": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", @@ -157,7 +158,7 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } @@ -174,12 +175,14 @@ "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", + "default": false, "hidden": true }, "version": { "type": "boolean", "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", + "default": false, "hidden": true }, "publish_dir_mode": { @@ -203,6 +206,7 @@ "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", + "default": false, "hidden": true }, "max_multiqc_email_size": { @@ -217,6 +221,7 @@ "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", + "default": false, "hidden": true }, "hook_url": { @@ -228,6 +233,7 @@ }, "multiqc_config": { "type": "string", + "format": "file-path", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true @@ -243,13 +249,6 @@ "description": "Custom MultiQC yaml file containing HTML including a methods description.", "fa_icon": "fas fa-cog" }, - "tracedir": { - "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", - "hidden": true - }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", @@ -257,12 +256,29 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", + "default": false, "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "default": false, + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "default": false, + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } } diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 2bd4928f..de25864e 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -1,21 +1,19 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS + PRINT PARAMS SUMMARY ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) +include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' -// Validate input parameters -WorkflowAmpliseq.initialise(params, log) +def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) +def citation = '\n' + WorkflowMain.citation(workflow) + '\n' +def summary_params = paramsSummaryMap(workflow) -// TODO nf-core: Add all file path parameters for the pipeline to the list below -// Check input path parameters to see if they exist -def checkPathParamList = [ params.input, params.multiqc_config, params.fasta ] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } +// Print parameter summary log to screen +log.info logo + paramsSummaryLog(workflow) + citation -// Check mandatory parameters -if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } +WorkflowAmpliseq.initialise(params, log) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -69,9 +67,12 @@ workflow AMPLISEQ { // SUBWORKFLOW: Read in samplesheet, validate and stage input files // INPUT_CHECK ( - ch_input + file(params.input) ) ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) + // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input") + // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ + // ! There is currently no tooling to help you write a sample sheet schema // // MODULE: Run FastQC @@ -91,7 +92,7 @@ workflow AMPLISEQ { workflow_summary = WorkflowAmpliseq.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) - methods_description = WorkflowAmpliseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) + methods_description = WorkflowAmpliseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) ch_methods_description = Channel.value(methods_description) ch_multiqc_files = Channel.empty() From bfc37c1b3ecd79d55da4822c331c4bccd16888cf Mon Sep 17 00:00:00 2001 From: jtangrot Date: Mon, 3 Jul 2023 11:20:08 +0200 Subject: [PATCH 060/230] Update requirements for sbdiexport --- lib/WorkflowAmpliseq.groovy | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/lib/WorkflowAmpliseq.groovy b/lib/WorkflowAmpliseq.groovy index 4e32805e..78c5b6b6 100755 --- a/lib/WorkflowAmpliseq.groovy +++ b/lib/WorkflowAmpliseq.groovy @@ -64,14 +64,16 @@ class WorkflowAmpliseq { Nextflow.error("Incompatible parameters: `--sbdiexport` expects specific taxonomics ranks (default) and therefore excludes modifying those using `--dada_assign_taxlevels`.") } - if (params.skip_dada_addspecies && params.sbdiexport && !params.sintax_ref_taxonomy) { - Nextflow.error("Incompatible parameters: `--sbdiexport` expects species annotation and therefore excludes `--skip_dada_addspecies`.") - } - if (params.skip_taxonomy && params.sbdiexport) { Nextflow.error("Incompatible parameters: `--sbdiexport` expects taxa annotation and therefore excludes `--skip_taxonomy`.") } + if (params.skip_dada_taxonomy && params.sbdiexport) { + if (!params.sintax_ref_taxonomy && (params.skip_qiime || !params.qiime_ref_taxonomy)) { + Nextflow.error("Incompatible parameters: `--sbdiexport` expects taxa annotation and therefore annotation with either DADA2, SINTAX, or QIIME2 is needed.") + } + } + if ( (!params.FW_primer || !params.RV_primer) && params.qiime_ref_taxonomy && !params.skip_qiime && !params.skip_taxonomy ) { Nextflow.error("Incompatible parameters: `--FW_primer` and `--RV_primer` are required for cutting the QIIME2 reference database to the amplicon sequences. Please specify primers or do not use `--qiime_ref_taxonomy`.") } @@ -88,9 +90,15 @@ class WorkflowAmpliseq { Nextflow.error("Incompatible parameters: `--filter_ssu` cannot be used with `--skip_barrnap` because filtering for SSU's depends on barrnap.") } - String[] sbdi_compatible_databases = ["coidb","coidb=221216","gtdb","gtdb=R07-RS207","gtdb=R06-RS202","gtdb=R05-RS95","midori2-co1","midori2-co1=gb250","pr2","pr2=5.0.0","pr2=4.14.0","pr2=4.13.0","rdp","rdp=18","sbdi-gtdb","sbdi-gtdb=R07-RS207-1","silva","silva=138","silva=132","unite-fungi","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2","unite-alleuk","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2"] - if (params.sbdiexport && (!Arrays.stream(sbdi_compatible_databases).anyMatch(entry -> params.dada_ref_taxonomy.toString().equals(entry)) || (params.sintax_ref_taxonomy && !Arrays.stream(sbdi_compatible_databases).anyMatch(entry -> params.sintax_ref_taxonomy.toString().equals(entry)) )) { - Nextflow.error("Incompatible parameters: `--sbdiexport` does not work with the chosen database of `--dada_ref_taxonomy`/`--sintax_ref_taxonomy` because the expected taxonomic levels do not match.") + String[] sbdi_compatible_databases = ["coidb","coidb=221216","gtdb","gtdb=R07-RS207","gtdb=R06-RS202","gtdb=R05-RS95","midori2-co1","midori2-co1=gb250","pr2","pr2=5.0.0","pr2=4.14.0","pr2=4.13.0","rdp","rdp=18","sbdi-gtdb","sbdi-gtdb=R07-RS207-1","silva","silva=138","silva=132","unite-fungi","unite-fungi=9.0","unite-fungi=8.","unite-fungi=8.2","unite-alleuk","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2"] + if (params.sbdiexport){ + if (params.sintax_ref_taxonomy ) { + if (!Arrays.stream(sbdi_compatible_databases).anyMatch(entry -> params.sintax_ref_taxonomy.toString().equals(entry)) ) { + Nextflow.error("Incompatible parameters: `--sbdiexport` does not work with the chosen database of `--sintax_ref_taxonomy` because the expected taxonomic levels do not match.") + } + } else if (!Arrays.stream(sbdi_compatible_databases).anyMatch(entry -> params.dada_ref_taxonomy.toString().equals(entry)) ) { + Nextflow.error("Incompatible parameters: `--sbdiexport` does not work with the chosen database of `--dada_ref_taxonomy` because the expected taxonomic levels do not match.") + } } if (params.addsh && !params.dada_ref_databases[params.dada_ref_taxonomy]["shfile"]) { From 90e54a89560f755a8bd6a166f3860903ea6e3916 Mon Sep 17 00:00:00 2001 From: jtangrot Date: Mon, 3 Jul 2023 11:30:54 +0200 Subject: [PATCH 061/230] Add unite-fungi 8.3 again --- lib/WorkflowAmpliseq.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/WorkflowAmpliseq.groovy b/lib/WorkflowAmpliseq.groovy index 78c5b6b6..9f24a34d 100755 --- a/lib/WorkflowAmpliseq.groovy +++ b/lib/WorkflowAmpliseq.groovy @@ -90,7 +90,7 @@ class WorkflowAmpliseq { Nextflow.error("Incompatible parameters: `--filter_ssu` cannot be used with `--skip_barrnap` because filtering for SSU's depends on barrnap.") } - String[] sbdi_compatible_databases = ["coidb","coidb=221216","gtdb","gtdb=R07-RS207","gtdb=R06-RS202","gtdb=R05-RS95","midori2-co1","midori2-co1=gb250","pr2","pr2=5.0.0","pr2=4.14.0","pr2=4.13.0","rdp","rdp=18","sbdi-gtdb","sbdi-gtdb=R07-RS207-1","silva","silva=138","silva=132","unite-fungi","unite-fungi=9.0","unite-fungi=8.","unite-fungi=8.2","unite-alleuk","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2"] + String[] sbdi_compatible_databases = ["coidb","coidb=221216","gtdb","gtdb=R07-RS207","gtdb=R06-RS202","gtdb=R05-RS95","midori2-co1","midori2-co1=gb250","pr2","pr2=5.0.0","pr2=4.14.0","pr2=4.13.0","rdp","rdp=18","sbdi-gtdb","sbdi-gtdb=R07-RS207-1","silva","silva=138","silva=132","unite-fungi","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2","unite-alleuk","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2"] if (params.sbdiexport){ if (params.sintax_ref_taxonomy ) { if (!Arrays.stream(sbdi_compatible_databases).anyMatch(entry -> params.sintax_ref_taxonomy.toString().equals(entry)) ) { From 33517437dd019fa444e0b5938dea8682cf2a0a52 Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 3 Jul 2023 13:18:37 +0200 Subject: [PATCH 062/230] add info about DAAD2's --sample_inference --- assets/report_template.Rmd | 28 ++++++++++++++++++++++------ bin/generate_report.R | 2 ++ modules/local/summary_report.nf | 1 + 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 8dec80ca..d8d58fdb 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -37,6 +37,7 @@ params: max_ee: "" trunc_qmin: "" trunc_rmin: "" + dada_sample_inference: "" filter_ssu: "" min_len_asv: "" max_len_asv: "" @@ -391,7 +392,14 @@ cat("Finally,", n_asv, cat("The ASVs can be found in ['dada2/ASV_seqs.fasta'](../dada2/). And the corresponding", " quantification of the ASVs across samples is in", "['dada2/ASV_table.tsv'](../dada2/). An extensive table containing both was ", - "saved as ['dada2/DADA2_table.tsv'](../dada2/)") + "saved as ['dada2/DADA2_table.tsv'](../dada2/). ") +if ( params$dada_sample_inference == "independent" ) { + cat("ASVs were inferred for each sample independently.") +} else if ( params$dada_sample_inference == "pooled" ) { + cat("ASVs were inferred from pooled sample information.") +} else { + cat("ASVs were initally inferred for each sample independently, but re-examined with all samples (pseudo-pooled).") +} ``` ```{r, results='asis'} @@ -937,16 +945,24 @@ cat("## Barplot\n", ```{r, eval = !params$flag_skip_alpha_rarefaction, results='asis'} cat("## Alpha diversity rarefaction curves\n", - "Produces rarefaction plots for several alpha diversity indices, and is primarily used to determine if the richness of the samples has been fully observed or sequenced. If the slope of the curves does not level out and the lines do not becomes horizontal, this might be because the sequencing depth was too low to observe all diversity or that sequencing error artificially increases sequence diversity and causes false discoveries. ", - "Folder [qiime2/alpha-rarefaction](../qiime2/alpha-rarefaction) contains the data, click [qiime2/alpha-rarefaction/index.html](../qiime2/alpha-rarefaction/index.html) to open it in your web browser.") -#TODO: highlight DADA2's pooling vs independent + "Produces rarefaction plots for several alpha diversity indices, and is primarily used to determine if the richness of the samples has been fully observed or sequenced. If the slope of the curves does not level out and the lines do not becomes horizontal, this might be because the sequencing depth was too low to observe all diversity or that sequencing error artificially increases sequence diversity and causes false discoveries. ") +# warning if dada_sample_inference is independent, because alpha diversities are not expected to be accurate! +if ( params$dada_sample_inference == "independent") { + cat("Please note that ASVs were inferred for each sample independently, that can make alpha diversity indices a poor estimate of true diversity. ") +} +cat("Folder [qiime2/alpha-rarefaction](../qiime2/alpha-rarefaction) contains the data, click [qiime2/alpha-rarefaction/index.html](../qiime2/alpha-rarefaction/index.html) to open it in your web browser.") ``` ```{r, eval = !params$flag_skip_diversity_indices, results='asis'} cat("## Diversity analysis\n", "Diversity measures summarize important sample features (alpha diversity) or differences between samples (beta diversity). To do so, sample data is first rarefied to the minimum number of counts per sample. ", "\n### Alpha diversity indices\n", - "Alpha diversity measures the species diversity within samples. Diversity calculations are based on sub-sampled data rarefied to the minimum read count of all samples. This step calculates alpha diversity using various methods and performs pairwise comparisons of groups of samples. It is based on a phylogenetic tree of all ASV sequences. ", + "Alpha diversity measures the species diversity within samples. Diversity calculations are based on sub-sampled data rarefied to the minimum read count of all samples. ", + sep = "\n") +if ( params$dada_sample_inference == "independent") { + cat("Please note that ASVs were inferred for each sample independently, that can make alpha diversity indices a poor estimate of true diversity. ") +} +cat("This step calculates alpha diversity using various methods and performs pairwise comparisons of groups of samples. It is based on a phylogenetic tree of all ASV sequences. ", "Folder [qiime2/diversity/alpha_diversity](../qiime2/diversity/alpha_diversity) contains the alpha-diverity data:\n", "- Shannon’s diversity index (quantitative): [qiime2/diversity/alpha_diversity/shannon_vector/index.html](../qiime2/diversity/alpha_diversity/shannon_vector/index.html)\n", "- Pielou’s Evenness: [qiime2/diversity/alpha_diversity/evenness_vector/index.html](../qiime2/diversity/alpha_diversity/evenness_vector/index.html)\n", @@ -963,7 +979,7 @@ cat("## Diversity analysis\n", "(2) Pairwise comparisons of groups of samples is performed for specific metadata that can be found in folder [qiime2/diversity/beta_diversity/](../qiime2/diversity/beta_diversity/). Each folder named '{method}_distance_matrix-{treatment}' contains an 'index.html' that allows to view the result of the statistical test for the diversity method between treatments.", sep = "\n") #TODO: adonis test is missing here! -#TODO: report rarefaction depth, note phylogenetic tree & phylogenetic placement, highlight DADA2's pooled method +#TODO: report rarefaction depth, note phylogenetic tree & phylogenetic placement ``` ```{r, eval = !params$flag_skip_ancom, results='asis'} diff --git a/bin/generate_report.R b/bin/generate_report.R index 4aa1c420..4cfefdc1 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -33,6 +33,7 @@ option_list = list( make_option(c("--path_asv_fa"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--path_dada2_tab"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--dada_stats_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--dada_sample_inference"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--filter_ssu"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--filter_ssu_stats"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--filter_ssu_asv"), type="character", default=NULL, help="MultiQC plots", metavar="character"), @@ -102,6 +103,7 @@ rmarkdown::render(opt$report, output_file = opt$output, path_asv_fa = opt$path_asv_fa, path_dada2_tab = opt$path_dada2_tab, dada_stats_path = opt$dada_stats_path, + dada_sample_inference = opt$dada_sample_inference, filter_ssu = opt$filter_ssu, filter_ssu_stats = opt$filter_ssu_stats, filter_ssu_asv = opt$filter_ssu_asv, diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 9f23e919..6de07800 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -102,6 +102,7 @@ process SUMMARY_REPORT { --path_dada2_tab $dada_tab \\ --dada_stats_path $dada_stats \\ --dada_filtntrim_args $dada_filtntrim_args \\ + --dada_sample_inference $params.sample_inference \\ $dada_err \\ $barrnap \\ $single_end \\ From 8ef5fbfbdc6c3b4298092f10a017edc89608354f Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 3 Jul 2023 13:36:05 +0200 Subject: [PATCH 063/230] add rarefaction depth --- assets/report_template.Rmd | 9 ++++++--- bin/generate_report.R | 2 ++ modules/local/summary_report.nf | 4 ++-- subworkflows/local/qiime2_diversity.nf | 7 +++++++ workflows/ampliseq.nf | 2 +- 5 files changed, 18 insertions(+), 6 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index d8d58fdb..57987f30 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -78,6 +78,7 @@ params: pplace_taxonomy: "" qiime2_taxonomy: "" filter_stats_tsv: "" + diversity_indices_depth: "" picrust_pathways: "" --- @@ -954,10 +955,12 @@ cat("Folder [qiime2/alpha-rarefaction](../qiime2/alpha-rarefaction) contains the ``` ```{r, eval = !params$flag_skip_diversity_indices, results='asis'} +diversity_indices_depth <- readLines(params$diversity_indices_depth) + cat("## Diversity analysis\n", - "Diversity measures summarize important sample features (alpha diversity) or differences between samples (beta diversity). To do so, sample data is first rarefied to the minimum number of counts per sample. ", + "Diversity measures summarize important sample features (alpha diversity) or differences between samples (beta diversity). Diversity calculations are based on sub-sampled data rarefied to",diversity_indices_depth, "counts. ", "\n### Alpha diversity indices\n", - "Alpha diversity measures the species diversity within samples. Diversity calculations are based on sub-sampled data rarefied to the minimum read count of all samples. ", + "Alpha diversity measures the species diversity within samples. ", sep = "\n") if ( params$dada_sample_inference == "independent") { cat("Please note that ASVs were inferred for each sample independently, that can make alpha diversity indices a poor estimate of true diversity. ") @@ -969,7 +972,7 @@ cat("This step calculates alpha diversity using various methods and performs pai "- Faith’s Phylogenetic Diversity (qualitiative, phylogenetic) [qiime2/diversity/alpha_diversity/faith_pd_vector/index.html](../qiime2/diversity/alpha_diversity/faith_pd_vector/index.html)\n", "- Observed OTUs (qualitative): [qiime2/diversity/alpha_diversity/observed_otus_vector/index.html](../qiime2/diversity/alpha_diversity/observed_otus_vector/index.html)\n", "\n### Beta diversity indices\n", - "Beta diversity measures the species community differences between samples. Diversity calculations are based on sub-sampled data rarefied to the minimum read count of all samples. This step calculates beta diversity distances using various methods and performs pairwise comparisons of groups of samples. Additionally, principle coordinates analysis (PCoA) plots are produced that can be visualized with Emperor in your default browser without the need for installation. This calculations are based on a phylogenetic tree of all ASV sequences. ", + "Beta diversity measures the species community differences between samples. This step calculates beta diversity distances using various methods and performs pairwise comparisons of groups of samples. Additionally, principle coordinates analysis (PCoA) plots are produced that can be visualized with Emperor in your default browser without the need for installation. This calculations are based on a phylogenetic tree of all ASV sequences. ", "Folder [qiime2/diversity/beta_diversity](../qiime2/diversity/beta_diversity) contains the beta-diverity data:\n", "(1) PCoA for four different beta diversity distances are accessible via:", "- Bray-Curtis distance (quantitative): [qiime2/diversity/beta_diversity/bray_curtis_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/bray_curtis_pcoa_results-PCoA/index.html)\n", diff --git a/bin/generate_report.R b/bin/generate_report.R index 4cfefdc1..7adf66d8 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -68,6 +68,7 @@ option_list = list( make_option(c("--flag_skip_abundance_tables"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), make_option(c("--flag_skip_alpha_rarefaction"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), make_option(c("--flag_skip_diversity_indices"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), + make_option(c("--diversity_indices_depth"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--flag_skip_ancom"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), make_option(c("--picrust_pathways"), type="character", default=NULL, help="MultiQC plots", metavar="character") ) @@ -139,5 +140,6 @@ rmarkdown::render(opt$report, output_file = opt$output, flag_skip_abundance_tables = opt$flag_skip_abundance_tables, flag_skip_alpha_rarefaction = opt$flag_skip_alpha_rarefaction, flag_skip_diversity_indices = opt$flag_skip_diversity_indices, + diversity_indices_depth = opt$diversity_indices_depth, flag_skip_ancom = opt$flag_skip_ancom, picrust_pathways = opt$picrust_pathways)) diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 6de07800..9fd5c7dd 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -42,7 +42,7 @@ process SUMMARY_REPORT { path(barplot) val(abundance_tables) val(alpha_rarefaction) - val(diversity_indices) + path(diversity_indices) val(ancom) path(picrust_pathways) @@ -88,7 +88,7 @@ process SUMMARY_REPORT { qiime2 += barplot ? "" : " --flag_skip_barplot" qiime2 += abundance_tables ? "" : " --flag_skip_abundance_tables" qiime2 += alpha_rarefaction ? "" : " --flag_skip_alpha_rarefaction" - qiime2 += diversity_indices ? "" : " --flag_skip_diversity_indices" + qiime2 += diversity_indices ? " --diversity_indices_depth $diversity_indices" : " --flag_skip_diversity_indices" qiime2 += ancom ? "" : " --flag_skip_ancom" def picrust = picrust_pathways ? "--picrust_pathways $picrust_pathways" : "" """ diff --git a/subworkflows/local/qiime2_diversity.nf b/subworkflows/local/qiime2_diversity.nf index b3d7f64b..e5645072 100644 --- a/subworkflows/local/qiime2_diversity.nf +++ b/subworkflows/local/qiime2_diversity.nf @@ -71,4 +71,11 @@ workflow QIIME2_DIVERSITY { .set{ ch_to_diversity_betaord } QIIME2_DIVERSITY_BETAORD ( ch_to_diversity_betaord ) } + + emit: + depth = QIIME2_DIVERSITY_CORE.out.depth + alpha = QIIME2_DIVERSITY_ALPHA.out.alpha + beta = QIIME2_DIVERSITY_BETA.out.beta + betaord = QIIME2_DIVERSITY_BETAORD.out.beta + adonis = QIIME2_DIVERSITY_ADONIS.out.html } diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index b0c1bf16..e3a8ea17 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -717,7 +717,7 @@ workflow AMPLISEQ { run_qiime2 && !params.skip_barplot ? QIIME2_BARPLOT.out.folder : [], run_qiime2 && !params.skip_abundance_tables ? "done" : "", run_qiime2 && !params.skip_alpha_rarefaction ? "done" : "", - run_qiime2 && !params.skip_diversity_indices && params.metadata ? "done" : "", + run_qiime2 && !params.skip_diversity_indices && params.metadata ? QIIME2_DIVERSITY.out.depth : [], run_qiime2 && !params.skip_ancom && params.metadata ? "done" : "", params.picrust ? PICRUST.out.pathways : [] // params.qiime_adonis_formula From ad861e9d99abf82593075f54c621904b28f439b8 Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 3 Jul 2023 16:15:59 +0200 Subject: [PATCH 064/230] add cutting of dada2 ref tax --- assets/report_template.Rmd | 29 ++++++++++++++++++++++++- bin/generate_report.R | 2 ++ modules/local/summary_report.nf | 2 ++ subworkflows/local/dada2_taxonomy_wf.nf | 1 + workflows/ampliseq.nf | 1 + 5 files changed, 34 insertions(+), 1 deletion(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 57987f30..7883a65d 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -73,6 +73,7 @@ params: filter_codons: "" stop_codons: "" itsx_cutasv_summary: "" + cut_dada_ref_taxonomy: "" dada2_taxonomy: "" sintax_taxonomy: "" pplace_taxonomy: "" @@ -648,6 +649,7 @@ cat("\n\nITSx results can be found in folder [itsx](../itsx).") ```{r, eval = params$flag_dada2_taxonomy, results='asis'} cat("## DADA2\n") +# indicate reference taxonomy if (!params$flag_ref_tax_user) { cat("The taxonomic classification was performed by DADA2 using the database: ", "\"", params$dada2_ref_tax_title, "\".\n\n", sep = "") @@ -656,6 +658,31 @@ if (!params$flag_ref_tax_user) { "provided by the user.\n\n", sep = "") } +# mention if taxonomy was cut by cutadapt +if ( isTRUE(params$cut_dada_ref_taxonomy != "") ) { + cut_dada_ref_taxonomy <- readLines(params$cut_dada_ref_taxonomy) + for (line in cut_dada_ref_taxonomy){ + if (grepl("Total reads processed:", line)) { + cut_dada_ref_taxonomy_orig <- sub("Total reads processed: *\t*", "", line) + } + if (grepl("Reads written \\(passing filters\\):", line)) { + cut_dada_ref_taxonomy_filt <- sub("Reads written .passing filters.: *\t*", "", line) + } + if (grepl("Total basepairs processed:", line)) { + cut_dada_ref_taxonomy_orig_bp <- sub("Total basepairs processed: *\t*", "", line) + } + if (grepl("Total written \\(filtered\\):", line)) { + cut_dada_ref_taxonomy_filt_bp <- sub("Total written \\(filtered\\): *\t*", "", line) + } + } + + cat("The taxonomic reference database was cut by primer sequences to improve matching. + The original database had ",cut_dada_ref_taxonomy_orig," sequences with ",cut_dada_ref_taxonomy_orig_bp, + ", retained were ",cut_dada_ref_taxonomy_filt," sequences that represented ",cut_dada_ref_taxonomy_filt_bp,".\n\n", + sep = "") +} + +# make statistics of taxonomic classification asv_tax <- read.table(params$dada2_taxonomy, header = TRUE, sep = "\t") # Calculate the classified numbers/percent of asv @@ -982,7 +1009,7 @@ cat("This step calculates alpha diversity using various methods and performs pai "(2) Pairwise comparisons of groups of samples is performed for specific metadata that can be found in folder [qiime2/diversity/beta_diversity/](../qiime2/diversity/beta_diversity/). Each folder named '{method}_distance_matrix-{treatment}' contains an 'index.html' that allows to view the result of the statistical test for the diversity method between treatments.", sep = "\n") #TODO: adonis test is missing here! -#TODO: report rarefaction depth, note phylogenetic tree & phylogenetic placement +#TODO: note phylogenetic tree & phylogenetic placement ``` ```{r, eval = !params$flag_skip_ancom, results='asis'} diff --git a/bin/generate_report.R b/bin/generate_report.R index 7adf66d8..f9aa155e 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -47,6 +47,7 @@ option_list = list( make_option(c("--itsx_cutasv_summary"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--cut_its"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--dada2_ref_tax_title"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--cut_dada_ref_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--dada2_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--flag_dada2_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), make_option(c("--sintax_ref_tax_title"), type="character", default=NULL, help="MultiQC plots", metavar="character"), @@ -119,6 +120,7 @@ rmarkdown::render(opt$report, output_file = opt$output, cut_its = opt$cut_its, cut_its = opt$cut_its, dada2_ref_tax_title = opt$dada2_ref_tax_title, + cut_dada_ref_taxonomy = opt$cut_dada_ref_taxonomy, dada2_taxonomy = opt$dada2_taxonomy, flag_dada2_taxonomy = opt$flag_dada2_taxonomy, flag_sintax_taxonomy = opt$flag_sintax_taxonomy, diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 9fd5c7dd..f69ff3ce 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -32,6 +32,7 @@ process SUMMARY_REPORT { path(filter_codons_stats) path(itsx_cutasv_summary) path(dada2_tax) + tuple val(meta_ref), path(cut_dada_ref_taxonomy) // cutadapt log when params.cut_dada_ref_taxonomy path(sintax_tax) path(pplace_tax) path(qiime2_tax) @@ -80,6 +81,7 @@ process SUMMARY_REPORT { def itsx_cutasv = itsx_cutasv_summary ? "--itsx_cutasv_summary $itsx_cutasv_summary --cut_its $params.cut_its" : "--cut_its none" def dada2_taxonomy = !dada2_tax ? "" : params.dada_ref_tax_custom ? "--flag_dada2_taxonomy --dada2_taxonomy $dada2_tax --ref_tax_user" : "--flag_dada2_taxonomy --dada2_taxonomy $dada2_tax --dada2_ref_tax_title '${params.dada_ref_databases[params.dada_ref_taxonomy]["title"]}'" + dada2_taxonomy += cut_dada_ref_taxonomy ? " --cut_dada_ref_taxonomy $cut_dada_ref_taxonomy" : "" def sintax_taxonomy = sintax_tax ? "--flag_sintax_taxonomy --sintax_taxonomy $sintax_tax --sintax_ref_tax_title '${params.sintax_ref_databases[params.sintax_ref_taxonomy]["title"]}'" : "" def pplace_taxonomy = pplace_tax ? "--flag_pplace_taxonomy --pplace_taxonomy $pplace_tax" : "" def qiime2_taxonomy = qiime2_tax ? "--flag_qiime2_taxonomy --qiime2_taxonomy $qiime2_tax --qiime2_ref_tax_title '${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}'" : "" diff --git a/subworkflows/local/dada2_taxonomy_wf.nf b/subworkflows/local/dada2_taxonomy_wf.nf index c5259e6c..15f4820e 100644 --- a/subworkflows/local/dada2_taxonomy_wf.nf +++ b/subworkflows/local/dada2_taxonomy_wf.nf @@ -104,6 +104,7 @@ workflow DADA2_TAXONOMY_WF { } emit: + cut_tax = params.cut_dada_ref_taxonomy ? CUTADAPT_TAXONOMY.out.log : [] tax = ch_dada2_tax versions = ch_versions_dada_taxonomy } diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index e3a8ea17..727d5e4d 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -707,6 +707,7 @@ workflow AMPLISEQ { params.filter_codons ? FILTER_CODONS.out.stats : [], params.cut_its != "none" ? ITSX_CUTASV.out.summary : [], !params.skip_taxonomy && params.dada_ref_taxonomy && !params.skip_dada_taxonomy ? ch_dada2_tax : [], + !params.skip_taxonomy && params.dada_ref_taxonomy && !params.skip_dada_taxonomy ? DADA2_TAXONOMY_WF.out.cut_tax : [], !params.skip_taxonomy && params.sintax_ref_taxonomy ? ch_sintax_tax : [], !params.skip_taxonomy && params.pplace_tree ? ch_pplace_tax : [], !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv : [], From be55df70be895ae78fbae3742ab21c5c7363bebc Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 3 Jul 2023 16:29:09 +0200 Subject: [PATCH 065/230] review output --- assets/report_template.Rmd | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 7883a65d..d749a933 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -131,7 +131,7 @@ cutadapt_summary <- read.table(file = params$ca_sum_path, header = TRUE, sep = " passed_col <- as.numeric(substr( cutadapt_summary$cutadapt_passing_filters_percent, 1, 4)) -max_disc <- 100 - min(passed_col) +max_disc <- round( 100 - min(passed_col), 1 ) avg_passed <- round(mean(passed_col),1) cutadapt_text_unch <- "Primers were trimmed using cutadapt" @@ -739,7 +739,6 @@ cat("\n\nDADA2 taxonomy assignments can be found in folder [dada2](../dada2) in cat("## QIIME2\n") cat("The taxonomic classification was performed by QIIME2 using the database: \"", params$qiime2_ref_tax_title, "\".\n\n", sep = "") -#TODO: only tested for greengenes85, need to test also UNITE and SILVA! # Read file and prepare table asv_tax <- read.table(params$qiime2_taxonomy, header = TRUE, sep = "\t") @@ -922,7 +921,7 @@ flag_filter_stats_tsv <- isTRUE(params$filter_stats_tsv != "") cat("## ASV filtering\n", "Unwanted taxa are often off-targets generated in PCR with primers that are not perfectly specific for the target DNA, for 16S rRNA sequencing mitrochondria and chloroplast sequences are typically removed because these are frequent unwanted non-bacteria PCR products. ") if ( params$exclude_taxa != "none" ) { - cat("ASVs were removed when the taxonomic string contanied any of", params$exclude_taxa) + cat("ASVs were removed when the taxonomic string contanied any of '", params$exclude_taxa, "' (comma separated)") } if ( params$min_frequency != 1 ) { cat(", had fewer than", params$min_frequency ,"total read counts over all sample") From 6d242284c90aa82f7da1aa1283ea27558823f96c Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 3 Jul 2023 17:12:23 +0200 Subject: [PATCH 066/230] export svgs --- assets/report_template.Rmd | 60 ++++++++++++++++++++++++++++----- modules/local/summary_report.nf | 5 +-- 2 files changed, 54 insertions(+), 11 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index d749a933..8c24fbe0 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -333,7 +333,7 @@ if ( params$flag_single_end ) { dada_stats_p_t <- data.frame(samples_t, steps_t, asvs_p_t) } else { # paired end - cat("Stacked barcharts of read pair numbers (denoisedF & denoisedR halfed, because each pair is split) per sample and processing stage (see above):\n\n") + cat("Stacked barchart of read pair numbers (denoisedF & denoisedR halfed, because each pair is split) per sample and processing stage (see above):\n\n") dada_stats_ex <- data.frame(sample = dada_stats$sample, DADA2_input = dada_stats$DADA2_input, @@ -369,12 +369,18 @@ if ( params$flag_single_end ) { # Plot dada_stats_p_t$steps_t <- factor(dada_stats_p_t$steps_t, levels=unique(dada_stats_p_t$steps_t)) -ggplot(dada_stats_p_t, aes(fill = steps_t, y = asvs_p_t, x = samples_t)) + + +plot_dada_stats_p_t <- ggplot(dada_stats_p_t, aes(fill = steps_t, y = asvs_p_t, x = samples_t)) + geom_bar(position = "fill", stat = "identity") + xlab("Samples") + ylab("Fraction of total reads") + coord_flip() + scale_fill_brewer("Filtering Steps", palette = "Spectral") +plot_dada_stats_p_t + +svg("stacked_barchart_of_reads.svg") +plot_dada_stats_p_t +dev.off() ``` The proportion of lost reads per processing stage and sample should not be too high, totalling typically <50%. @@ -449,13 +455,18 @@ cat( barrnap_df_sum$count[4], "(", barrnap_df_sum$percent[4],"%) ASVs to Eukaryo cat( barrnap_df_sum$count[5], "(", barrnap_df_sum$percent[5],"%) were below similarity threshold to any kingdom." ) # Barplot -ggplot(barrnap_df_sum, +plot_barrnap_df_sum <- ggplot(barrnap_df_sum, aes(x = reorder(label, desc(label)), y = percent)) + geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) + ylab("% Classification") + xlab("rRNA origins") + coord_flip() + theme_bw() +plot_barrnap_df_sum + +svg("rrna_detection_with_barrnap.svg") +plot_barrnap_df_sum +dev.off() cat("\n\nrRNA filter results can be found in folder [barrnap](../barrnap).") ``` @@ -516,13 +527,18 @@ if ( params$max_len_asv != 0 ) { filter_len_asv_filtered <- subset(filter_len_asv_filtered, Length <= params$max_len_asv) } -ggplot(filter_len_profile, +plot_filter_len_profile <- ggplot(filter_len_profile, aes(x = Length, y = Counts)) + geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) + ylab("Number of ASVs") + xlab("Length") + coord_flip() + theme_bw() +plot_filter_len_profile + +svg("asv_length_profile_before_length_filter.svg") +plot_filter_len_profile +dev.off() # Reads removed @@ -635,13 +651,18 @@ itsx_origins$percent <- round( itsx_origins$count / itsx_summary_nasv * 100, 2) cat(itsx_summary_its, "of",itsx_summary_nasv,"(",round( itsx_summary_its/itsx_summary_nasv*100 ,2),"%) ASVs were identified as ITS.", "The following plot shows ITS sequences by preliminary origin:") -ggplot(itsx_origins, +plot_itsx_origins <- ggplot(itsx_origins, aes(x = origin, y = percent)) + geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) + ylab("%") + xlab("ITS sequences by preliminary origin") + coord_flip() + theme_bw() +plot_itsx_origins + +svg("itsx_preliminary_origin.svg") +plot_itsx_origins +dev.off() cat("\n\nITSx results can be found in folder [itsx](../itsx).") ``` @@ -723,13 +744,18 @@ cat(outputstr) # Barplot # Plot asv_classi_df$level <- factor(asv_classi_df$level, levels = asv_classi_df$level) -ggplot(asv_classi_df, +plot_asv_classi_df <- ggplot(asv_classi_df, aes(x = reorder(level, desc(level)), y = p_asv_classified)) + geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) + ylab("% Classification") + xlab("Levels") + coord_flip() + theme_bw() +plot_asv_classi_df + +svg("dada2_taxonomic_classification_per_taxonomy_level.svg") +plot_asv_classi_df +dev.off() cat("\n\nDADA2 taxonomy assignments can be found in folder [dada2](../dada2) in files 'ASV_tax_*.tsv'.") ``` @@ -784,13 +810,18 @@ cat(outputstr) # Barplot # Plot asv_classi_df$level <- factor(asv_classi_df$level, levels = asv_classi_df$level) -ggplot(asv_classi_df, +plot_asv_classi_df <- ggplot(asv_classi_df, aes(x = reorder(level, desc(level)), y = p_asv_classified)) + geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) + ylab("% Classification") + xlab("Levels") + coord_flip() + theme_bw() +plot_asv_classi_df + +svg("qiime2_taxonomic_classification_per_taxonomy_level.svg") +plot_asv_classi_df +dev.off() cat("\n\nQIIME2 taxonomy assignments can be found in folder [qiime2/taxonomy](../qiime2/taxonomy).") ``` @@ -841,13 +872,18 @@ cat(outputstr) # Barplot # Plot asv_classi_df$level <- factor(asv_classi_df$level, levels = asv_classi_df$level) -ggplot(asv_classi_df, +plot_asv_classi_df <- ggplot(asv_classi_df, aes(x = reorder(level, desc(level)), y = p_asv_classified)) + geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) + ylab("% Classification") + xlab("Levels") + coord_flip() + theme_bw() +plot_asv_classi_df + +svg("sintax_taxonomic_classification_per_taxonomy_level.svg") +plot_asv_classi_df +dev.off() cat("\n\nSINTAX taxonomy assignments can be found in folder [sintax](../sintax).") ``` @@ -893,13 +929,18 @@ cat(outputstr) # Barplot # Plot asv_classi_df$level <- factor(asv_classi_df$level, levels = asv_classi_df$level) -ggplot(asv_classi_df, +plot_asv_classi_df <- ggplot(asv_classi_df, aes(x = reorder(level, desc(level)), y = p_asv_classified)) + geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) + ylab("% Classification") + xlab("Taxonomic levels") + coord_flip() + theme_bw() +plot_asv_classi_df + +svg("phylogenetic_placement_taxonomic_classification_per_taxonomy_level.svg") +plot_asv_classi_df +dev.off() #TODO: *.heattree.tree.svg could be displayed as well! @@ -1035,6 +1076,7 @@ cat("## PICRUSt2\n", This report (file 'summary_report.html') is located in folder [summary_report](.) of the original pipeline results folder. In this file, all links to files and folders are relative, therefore hyperlinks will only work when the report is at its original place in the pipeline results folder. +Plots specifically produced for this report (if any) can be also found in folder [summary_report](.). A comprehensive read count report throughout the pipeline can be found in the [base results folder](../) in file 'overall_summary.tsv'. Technical information to the pipeline run are collected in folder [pipeline_info](../pipeline_info). diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index f69ff3ce..2d313165 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -49,8 +49,9 @@ process SUMMARY_REPORT { output: - path "summary_report.html" , emit: report - path "versions.yml" , emit: versions + path "*.svg" , emit: svg, optional: true + path "summary_report.html" , emit: report + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when From 313e131403905c5ece66180fbf6dde5b15c9bc5e Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 4 Jul 2023 12:23:51 +0200 Subject: [PATCH 067/230] fix channel issues --- subworkflows/local/dada2_taxonomy_wf.nf | 2 +- subworkflows/local/qiime2_diversity.nf | 10 +++++----- workflows/ampliseq.nf | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/subworkflows/local/dada2_taxonomy_wf.nf b/subworkflows/local/dada2_taxonomy_wf.nf index 15f4820e..9673b45e 100644 --- a/subworkflows/local/dada2_taxonomy_wf.nf +++ b/subworkflows/local/dada2_taxonomy_wf.nf @@ -104,7 +104,7 @@ workflow DADA2_TAXONOMY_WF { } emit: - cut_tax = params.cut_dada_ref_taxonomy ? CUTADAPT_TAXONOMY.out.log : [] + cut_tax = params.cut_dada_ref_taxonomy ? CUTADAPT_TAXONOMY.out.log : [[],[]] tax = ch_dada2_tax versions = ch_versions_dada_taxonomy } diff --git a/subworkflows/local/qiime2_diversity.nf b/subworkflows/local/qiime2_diversity.nf index e5645072..b305168f 100644 --- a/subworkflows/local/qiime2_diversity.nf +++ b/subworkflows/local/qiime2_diversity.nf @@ -73,9 +73,9 @@ workflow QIIME2_DIVERSITY { } emit: - depth = QIIME2_DIVERSITY_CORE.out.depth - alpha = QIIME2_DIVERSITY_ALPHA.out.alpha - beta = QIIME2_DIVERSITY_BETA.out.beta - betaord = QIIME2_DIVERSITY_BETAORD.out.beta - adonis = QIIME2_DIVERSITY_ADONIS.out.html + depth = !skip_diversity_indices ? QIIME2_DIVERSITY_CORE.out.depth : [] + alpha = !skip_diversity_indices ? QIIME2_DIVERSITY_ALPHA.out.alpha : [] + beta = !skip_diversity_indices ? QIIME2_DIVERSITY_BETA.out.beta : [] + betaord = !skip_diversity_indices ? QIIME2_DIVERSITY_BETAORD.out.beta : [] + adonis = params.qiime_adonis_formula ? QIIME2_DIVERSITY_ADONIS.out.html : [] } diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 727d5e4d..89d29656 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -707,7 +707,7 @@ workflow AMPLISEQ { params.filter_codons ? FILTER_CODONS.out.stats : [], params.cut_its != "none" ? ITSX_CUTASV.out.summary : [], !params.skip_taxonomy && params.dada_ref_taxonomy && !params.skip_dada_taxonomy ? ch_dada2_tax : [], - !params.skip_taxonomy && params.dada_ref_taxonomy && !params.skip_dada_taxonomy ? DADA2_TAXONOMY_WF.out.cut_tax : [], + !params.skip_taxonomy && params.dada_ref_taxonomy && !params.skip_dada_taxonomy ? DADA2_TAXONOMY_WF.out.cut_tax : [[],[]], !params.skip_taxonomy && params.sintax_ref_taxonomy ? ch_sintax_tax : [], !params.skip_taxonomy && params.pplace_tree ? ch_pplace_tax : [], !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv : [], From df9c3651cfa83fd0c6a7888bd99dff1c6c28ac6d Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 4 Jul 2023 13:29:26 +0200 Subject: [PATCH 068/230] add heattree --- assets/report_template.Rmd | 12 +++++++++--- bin/generate_report.R | 2 ++ modules/local/summary_report.nf | 3 ++- workflows/ampliseq.nf | 1 + 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 8c24fbe0..7e7277a4 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -77,6 +77,7 @@ params: dada2_taxonomy: "" sintax_taxonomy: "" pplace_taxonomy: "" + pplace_heattree: "" qiime2_taxonomy: "" filter_stats_tsv: "" diversity_indices_depth: "" @@ -892,7 +893,7 @@ cat("\n\nSINTAX taxonomy assignments can be found in folder [sintax](../sintax). # Header cat("## Phylogenetic Placement\n", "Phylogenetic placement grafts sequences onto a phylogenetic reference tree and optionally outputs taxonomic annotations. The reference tree is ideally made from full-length high-quality sequences containing better evolutionary signal than short amplicons. It is hence superior to estimating de-novo phylogenetic trees from short amplicon sequences. ", - "Extraction of taxonomic classification wads performed with EPA-NG and GAPPA.") + "Extraction of taxonomic classification wads performed with EPA-NG and GAPPA. ") # Read file and prepare table asv_tax <- read.table(params$pplace_taxonomy, header = TRUE, sep = "\t") @@ -942,8 +943,14 @@ svg("phylogenetic_placement_taxonomic_classification_per_taxonomy_level.svg") plot_asv_classi_df dev.off() -#TODO: *.heattree.tree.svg could be displayed as well! +cat("\n\nHeattree of the phylogenetic placement:") +``` + +```{r, eval = params$flag_pplace_taxonomy, out.width="100%", fig.show='hold', fig.align='default'} +knitr::include_graphics(c(params$pplace_heattree)) +``` +```{r, eval = params$flag_pplace_taxonomy, results='asis'} cat("\n\nPhylogenetic placement taxonomy assignments can be found in folder [pplace](../pplace) in file '*.taxonomy.per_query_unique.tsv'.") ``` @@ -1049,7 +1056,6 @@ cat("This step calculates alpha diversity using various methods and performs pai "(2) Pairwise comparisons of groups of samples is performed for specific metadata that can be found in folder [qiime2/diversity/beta_diversity/](../qiime2/diversity/beta_diversity/). Each folder named '{method}_distance_matrix-{treatment}' contains an 'index.html' that allows to view the result of the statistical test for the diversity method between treatments.", sep = "\n") #TODO: adonis test is missing here! -#TODO: note phylogenetic tree & phylogenetic placement ``` ```{r, eval = !params$flag_skip_ancom, results='asis'} diff --git a/bin/generate_report.R b/bin/generate_report.R index f9aa155e..9acb3900 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -54,6 +54,7 @@ option_list = list( make_option(c("--sintax_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--flag_sintax_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), make_option(c("--pplace_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--pplace_heattree"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--flag_pplace_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), make_option(c("--flag_qiime2_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), make_option(c("--val_used_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), @@ -128,6 +129,7 @@ rmarkdown::render(opt$report, output_file = opt$output, sintax_taxonomy = opt$sintax_taxonomy, flag_pplace_taxonomy = opt$flag_pplace_taxonomy, pplace_taxonomy = opt$pplace_taxonomy, + pplace_heattree = opt$pplace_heattree, flag_qiime2_taxonomy = opt$flag_qiime2_taxonomy, val_used_taxonomy = opt$val_used_taxonomy, qiime2_ref_tax_title = opt$qiime2_ref_tax_title, diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 2d313165..34c31bf8 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -35,6 +35,7 @@ process SUMMARY_REPORT { tuple val(meta_ref), path(cut_dada_ref_taxonomy) // cutadapt log when params.cut_dada_ref_taxonomy path(sintax_tax) path(pplace_tax) + tuple val(meta_pplace), path(pplace_heattree) path(qiime2_tax) val(run_qiime2) val(val_used_taxonomy) @@ -84,7 +85,7 @@ process SUMMARY_REPORT { params.dada_ref_tax_custom ? "--flag_dada2_taxonomy --dada2_taxonomy $dada2_tax --ref_tax_user" : "--flag_dada2_taxonomy --dada2_taxonomy $dada2_tax --dada2_ref_tax_title '${params.dada_ref_databases[params.dada_ref_taxonomy]["title"]}'" dada2_taxonomy += cut_dada_ref_taxonomy ? " --cut_dada_ref_taxonomy $cut_dada_ref_taxonomy" : "" def sintax_taxonomy = sintax_tax ? "--flag_sintax_taxonomy --sintax_taxonomy $sintax_tax --sintax_ref_tax_title '${params.sintax_ref_databases[params.sintax_ref_taxonomy]["title"]}'" : "" - def pplace_taxonomy = pplace_tax ? "--flag_pplace_taxonomy --pplace_taxonomy $pplace_tax" : "" + def pplace_taxonomy = pplace_tax ? "--flag_pplace_taxonomy --pplace_taxonomy $pplace_tax --pplace_heattree $pplace_heattree" : "" def qiime2_taxonomy = qiime2_tax ? "--flag_qiime2_taxonomy --qiime2_taxonomy $qiime2_tax --qiime2_ref_tax_title '${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}'" : "" def qiime2 = run_qiime2 ? "--val_used_taxonomy '$val_used_taxonomy'" : "--flag_skip_qiime" qiime2 += filter_stats_tsv ? " --filter_stats_tsv $filter_stats_tsv --qiime2_filtertaxa '$qiime2_filtertaxa' --exclude_taxa $params.exclude_taxa --min_frequency $params.min_frequency --min_samples $params.min_samples" : "" diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 89d29656..4075f5c2 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -710,6 +710,7 @@ workflow AMPLISEQ { !params.skip_taxonomy && params.dada_ref_taxonomy && !params.skip_dada_taxonomy ? DADA2_TAXONOMY_WF.out.cut_tax : [[],[]], !params.skip_taxonomy && params.sintax_ref_taxonomy ? ch_sintax_tax : [], !params.skip_taxonomy && params.pplace_tree ? ch_pplace_tax : [], + !params.skip_taxonomy && params.pplace_tree ? FASTA_NEWICK_EPANG_GAPPA.out.heattree : [[],[]], !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv : [], run_qiime2, run_qiime2 ? val_used_taxonomy : "", From b10924951888b30fa9fae9ae2ce64014d75ea198 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 4 Jul 2023 14:23:14 +0200 Subject: [PATCH 069/230] incremental visual improvements --- assets/report_template.Rmd | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 7e7277a4..397b93f7 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -312,7 +312,7 @@ replicates due to high impact of stochasticity. if ( params$flag_single_end ) { # single end - cat("Stacked barcharts of read numbers per sample and processing stage (see above):\n\n") + cat("Stacked barcharts of read numbers per sample and processing stage") dada_stats_ex <- data.frame(sample = dada_stats$sample, input = dada_stats$DADA2_input, @@ -321,8 +321,14 @@ if ( params$flag_single_end ) { nonchim = dada_stats$denoised-dada_stats$nonchim, analysis = dada_stats$nonchim) dada_stats_p <- data.frame(sample = dada_stats_ex$sample, round(dada_stats_ex[2:6]/dada_stats_ex$input*100, 2)) - n_samples <- length(dada_stats_p$sample) + # If more than 20 sample only display subset! + if ( nrow(dada_stats_p)>=20 ) { + cat(" (display 10 samples of each lowest and highest percentage of reads analysed, of",nrow(dada_stats_p),"samples)") + dada_stats_p <- dada_stats_p[order(-dada_stats_p$analysis),] + dada_stats_p <- rbind(head(dada_stats_p,10),tail(dada_stats_p,10)) + } # Stack columns for both stacked barcharts + n_samples <- length(dada_stats_p$sample) samples_t <- c(rep(dada_stats_p$sample, 4)) steps_t <- c(rep("excluded by filtering", n_samples), rep("excluded by denoised", n_samples), rep("excluded by nonchim", n_samples), rep("reads in final ASVs", n_samples)) @@ -334,7 +340,7 @@ if ( params$flag_single_end ) { dada_stats_p_t <- data.frame(samples_t, steps_t, asvs_p_t) } else { # paired end - cat("Stacked barchart of read pair numbers (denoisedF & denoisedR halfed, because each pair is split) per sample and processing stage (see above):\n\n") + cat("Stacked barchart of read pair numbers (denoisedF & denoisedR halfed, because each pair is split) per sample and processing stage") dada_stats_ex <- data.frame(sample = dada_stats$sample, DADA2_input = dada_stats$DADA2_input, @@ -345,6 +351,12 @@ if ( params$flag_single_end ) { nonchim = dada_stats$merged-dada_stats$nonchim, analysis = dada_stats$nonchim) dada_stats_p <- data.frame(sample = dada_stats_ex$sample, round(dada_stats_ex[2:8]/dada_stats_ex$DADA2_input*100, 2)) + # If more than 20 sample only display subset! + if ( nrow(dada_stats_p)>=20 ) { + cat(" (display 10 samples of each lowest and highest percentage of reads analysed, of",nrow(dada_stats_p),"samples)") + dada_stats_p <- dada_stats_p[order(-dada_stats_p$analysis),] + dada_stats_p <- rbind(head(dada_stats_p,10),tail(dada_stats_p,10)) + } # Stack columns for both stacked barcharts n_samples <- length(dada_stats_p$sample) samples_t <- c(rep(dada_stats_p$sample, 6)) @@ -358,6 +370,7 @@ if ( params$flag_single_end ) { asvs_p_t <- as.array(flatten_dbl(dada_stats_p[3:8])) dada_stats_p_t <- data.frame(samples_t, steps_t, asvs_p_t) } +cat(":\n\n") # Plot #dada_stats_ex_t$steps_t <- factor(dada_stats_ex_t$steps_t, levels=unique(dada_stats_ex_t$steps_t)) @@ -382,6 +395,8 @@ plot_dada_stats_p_t svg("stacked_barchart_of_reads.svg") plot_dada_stats_p_t dev.off() + +cat("\n\nBetween",min(dada_stats_p$analysis),"% and",max(dada_stats_p$analysis),"% reads per sample were retained for analysis within DADA2 steps.") ``` The proportion of lost reads per processing stage and sample should not be too high, totalling typically <50%. @@ -512,7 +527,7 @@ flag_filter_len_asv <- isTRUE(params$filter_len_asv != "") cat("## Sequence length\n") cat("A length filter was used to reduce potential contamination after ASV computation.", - "Before filtering, ASVs had the following length profile:\n\n") + "Before filtering, ASVs had the following length profile (count of 1 was transformed to 1.5 to plot on log10 scale):\n\n") # ASV length profile @@ -528,12 +543,15 @@ if ( params$max_len_asv != 0 ) { filter_len_asv_filtered <- subset(filter_len_asv_filtered, Length <= params$max_len_asv) } +# replace 1 with 1.5 to display on log scale +filter_len_profile$Counts[filter_len_profile$Counts == 1] <- 1.5 + plot_filter_len_profile <- ggplot(filter_len_profile, aes(x = Length, y = Counts)) + geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) + ylab("Number of ASVs") + xlab("Length") + - coord_flip() + + scale_y_continuous(trans = "log10") + theme_bw() plot_filter_len_profile @@ -991,6 +1009,9 @@ cat("Consequently,",qiime2_filtertaxa_orig,"ASVs were reduced by",qiime2_filtert # import stats tsv filter_stats_tsv <- read.table(file = params$filter_stats_tsv, header = TRUE, sep = "\t") colnames(filter_stats_tsv) <- gsub("_tax_filter","",colnames(filter_stats_tsv)) +filter_stats_tsv$retained_percent <- round( filter_stats_tsv$retained_percent, 2) +filter_stats_tsv$lost_percent <- round( filter_stats_tsv$lost_percent, 2) +colnames(filter_stats_tsv) <- gsub("_percent","%",colnames(filter_stats_tsv)) # Display table datatable(filter_stats_tsv, options = list( From 4ef20b313db19b912000dbb75708bbdda442672f Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 4 Jul 2023 14:29:30 +0200 Subject: [PATCH 070/230] suppress unwanted output by dev.off() --- assets/report_template.Rmd | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 397b93f7..93ee5ff2 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -394,7 +394,7 @@ plot_dada_stats_p_t svg("stacked_barchart_of_reads.svg") plot_dada_stats_p_t -dev.off() +invisible(dev.off()) cat("\n\nBetween",min(dada_stats_p$analysis),"% and",max(dada_stats_p$analysis),"% reads per sample were retained for analysis within DADA2 steps.") ``` @@ -482,7 +482,7 @@ plot_barrnap_df_sum svg("rrna_detection_with_barrnap.svg") plot_barrnap_df_sum -dev.off() +invisible(dev.off()) cat("\n\nrRNA filter results can be found in folder [barrnap](../barrnap).") ``` @@ -557,7 +557,7 @@ plot_filter_len_profile svg("asv_length_profile_before_length_filter.svg") plot_filter_len_profile -dev.off() +invisible(dev.off()) # Reads removed @@ -681,7 +681,7 @@ plot_itsx_origins svg("itsx_preliminary_origin.svg") plot_itsx_origins -dev.off() +invisible(dev.off()) cat("\n\nITSx results can be found in folder [itsx](../itsx).") ``` @@ -774,7 +774,7 @@ plot_asv_classi_df svg("dada2_taxonomic_classification_per_taxonomy_level.svg") plot_asv_classi_df -dev.off() +invisible(dev.off()) cat("\n\nDADA2 taxonomy assignments can be found in folder [dada2](../dada2) in files 'ASV_tax_*.tsv'.") ``` @@ -840,7 +840,7 @@ plot_asv_classi_df svg("qiime2_taxonomic_classification_per_taxonomy_level.svg") plot_asv_classi_df -dev.off() +invisible(dev.off()) cat("\n\nQIIME2 taxonomy assignments can be found in folder [qiime2/taxonomy](../qiime2/taxonomy).") ``` @@ -902,7 +902,7 @@ plot_asv_classi_df svg("sintax_taxonomic_classification_per_taxonomy_level.svg") plot_asv_classi_df -dev.off() +invisible(dev.off()) cat("\n\nSINTAX taxonomy assignments can be found in folder [sintax](../sintax).") ``` @@ -959,7 +959,7 @@ plot_asv_classi_df svg("phylogenetic_placement_taxonomic_classification_per_taxonomy_level.svg") plot_asv_classi_df -dev.off() +invisible(dev.off()) cat("\n\nHeattree of the phylogenetic placement:") ``` From 8a41991e7351988bd742dba4a22dec40c4a8c177 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 4 Jul 2023 15:34:40 +0200 Subject: [PATCH 071/230] add adonis --- assets/report_template.Rmd | 38 +++++++++++++++++++++++---------- bin/generate_report.R | 4 ++++ modules/local/summary_report.nf | 2 ++ workflows/ampliseq.nf | 1 + 4 files changed, 34 insertions(+), 11 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 93ee5ff2..7e87ffaa 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -50,6 +50,7 @@ params: min_samples: "" qiime2_filtertaxa: "" val_used_taxonomy: "" + qiime_adonis_formula: "" # file paths mqc_plot: "" @@ -81,6 +82,7 @@ params: qiime2_taxonomy: "" filter_stats_tsv: "" diversity_indices_depth: "" + diversity_indices_adonis: "" picrust_pathways: "" --- @@ -155,16 +157,6 @@ datatable(cutadapt_summary, options = list( scrollY = "300px", paging = FALSE)) -# Barplot TODO: currently skipper, because this is already in the table -#cutadapt_summary$passed_num <- passed_col -#ggplot(cutadapt_summary, -# aes(x = sample, y = passed_col)) + -# geom_bar(stat = "identity", fill = rgb(0.1, 0.4, 0.75), width = 0.5) + -# ylab("% sequencing reads passing filters of cutadapt") + -# xlab("Samples") + -# coord_flip() + -# theme_bw() - cat("\n\nCutadapt results can be found in folder [cutadapt](../cutadapt).") ``` @@ -1076,7 +1068,31 @@ cat("This step calculates alpha diversity using various methods and performs pai "- weighted UniFrac distance (quantitative, phylogenetic): [qiime2/diversity/beta_diversity/weighted_unifrac_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/weighted_unifrac_pcoa_results-PCoA/index.html)\n", "(2) Pairwise comparisons of groups of samples is performed for specific metadata that can be found in folder [qiime2/diversity/beta_diversity/](../qiime2/diversity/beta_diversity/). Each folder named '{method}_distance_matrix-{treatment}' contains an 'index.html' that allows to view the result of the statistical test for the diversity method between treatments.", sep = "\n") -#TODO: adonis test is missing here! +``` + +```{r, results='asis'} +flag_qiime_adonis_formula <- isTRUE(params$qiime_adonis_formula != "") +``` + +```{r, eval = params$flag_qiime_adonis_formula, results='asis'} +#qiime_adonis_formula +#diversity_indices_adonis +cat("** ADONIS test for beta diversity**\n\n") +cat("Permutational multivariate analysis of variance using distance matrices (adonis) + determines whether groups of samples are significantly different from one another. + The formula was '",params$qiime_adonis_formula,"' (multiple formulas are comma separated). + ADONIS computes an R2 value (effect size) which shows the percentage of variation explained + by a condition, as well as a p-value to determine the statistical significance. + The sequence of conditions in the formula matters, the variance of factors is removed + (statistically controlled for) from beginning to end of the formula. " ) + +cat("\n\nTest results are in separate folders following the scheme '{method}_distance_matrix-{adonis formula}':\n") +diversity_indices_adonis <- unlist( strsplit( params$diversity_indices_adonis,"," ) ) +for (folder in diversity_indices_adonis) { + adonis_index_path <- paste0("qiime2/diversity/beta_diversity/adonis/",folder) + cat("\n- [",adonis_index_path,"/index.html](../",adonis_index_path,"/index.html)\n", sep="") +} + ``` ```{r, eval = !params$flag_skip_ancom, results='asis'} diff --git a/bin/generate_report.R b/bin/generate_report.R index 9acb3900..27a2ec99 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -71,6 +71,8 @@ option_list = list( make_option(c("--flag_skip_alpha_rarefaction"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), make_option(c("--flag_skip_diversity_indices"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), make_option(c("--diversity_indices_depth"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--diversity_indices_adonis"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--qiime_adonis_formula"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--flag_skip_ancom"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), make_option(c("--picrust_pathways"), type="character", default=NULL, help="MultiQC plots", metavar="character") ) @@ -145,5 +147,7 @@ rmarkdown::render(opt$report, output_file = opt$output, flag_skip_alpha_rarefaction = opt$flag_skip_alpha_rarefaction, flag_skip_diversity_indices = opt$flag_skip_diversity_indices, diversity_indices_depth = opt$diversity_indices_depth, + diversity_indices_adonis = opt$diversity_indices_adonis, + qiime_adonis_formula = opt$qiime_adonis_formula, flag_skip_ancom = opt$flag_skip_ancom, picrust_pathways = opt$picrust_pathways)) diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 34c31bf8..3eb85bae 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -45,6 +45,7 @@ process SUMMARY_REPORT { val(abundance_tables) val(alpha_rarefaction) path(diversity_indices) + path(diversity_indices_adonis) val(ancom) path(picrust_pathways) @@ -93,6 +94,7 @@ process SUMMARY_REPORT { qiime2 += abundance_tables ? "" : " --flag_skip_abundance_tables" qiime2 += alpha_rarefaction ? "" : " --flag_skip_alpha_rarefaction" qiime2 += diversity_indices ? " --diversity_indices_depth $diversity_indices" : " --flag_skip_diversity_indices" + qiime2 += diversity_indices_adonis ? " --diversity_indices_adonis '"+ diversity_indices_adonis.join(",") +"' --qiime_adonis_formula $params.qiime_adonis_formula" : "" qiime2 += ancom ? "" : " --flag_skip_ancom" def picrust = picrust_pathways ? "--picrust_pathways $picrust_pathways" : "" """ diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 4075f5c2..6c1f1816 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -720,6 +720,7 @@ workflow AMPLISEQ { run_qiime2 && !params.skip_abundance_tables ? "done" : "", run_qiime2 && !params.skip_alpha_rarefaction ? "done" : "", run_qiime2 && !params.skip_diversity_indices && params.metadata ? QIIME2_DIVERSITY.out.depth : [], + run_qiime2 && !params.skip_diversity_indices && params.metadata ? QIIME2_DIVERSITY.out.adonis.collect() : [], run_qiime2 && !params.skip_ancom && params.metadata ? "done" : "", params.picrust ? PICRUST.out.pathways : [] // params.qiime_adonis_formula From 5d9324b35937ee34ad1dc842ed3dbe9ab8aaf0dc Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 4 Jul 2023 15:56:32 +0200 Subject: [PATCH 072/230] more detailed ancom results --- assets/report_template.Rmd | 27 ++++++++++++++++++++------- bin/generate_report.R | 4 ++-- modules/local/summary_report.nf | 4 ++-- subworkflows/local/qiime2_ancom.nf | 3 +++ workflows/ampliseq.nf | 3 +-- 5 files changed, 28 insertions(+), 13 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 7e87ffaa..fc9533e7 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -31,7 +31,7 @@ params: flag_skip_abundance_tables: FALSE flag_skip_alpha_rarefaction: FALSE flag_skip_diversity_indices: FALSE - flag_skip_ancom: FALSE + ancom: FALSE trunclenf: "" trunclenr: "" max_ee: "" @@ -1077,7 +1077,7 @@ flag_qiime_adonis_formula <- isTRUE(params$qiime_adonis_formula != "") ```{r, eval = params$flag_qiime_adonis_formula, results='asis'} #qiime_adonis_formula #diversity_indices_adonis -cat("** ADONIS test for beta diversity**\n\n") +cat(" **ADONIS test for beta diversity**\n\n") cat("Permutational multivariate analysis of variance using distance matrices (adonis) determines whether groups of samples are significantly different from one another. The formula was '",params$qiime_adonis_formula,"' (multiple formulas are comma separated). @@ -1087,19 +1087,32 @@ cat("Permutational multivariate analysis of variance using distance matrices (ad (statistically controlled for) from beginning to end of the formula. " ) cat("\n\nTest results are in separate folders following the scheme '{method}_distance_matrix-{adonis formula}':\n") -diversity_indices_adonis <- unlist( strsplit( params$diversity_indices_adonis,"," ) ) +diversity_indices_adonis <- sort( unlist( strsplit( params$diversity_indices_adonis,"," ) ) ) for (folder in diversity_indices_adonis) { adonis_index_path <- paste0("qiime2/diversity/beta_diversity/adonis/",folder) cat("\n- [",adonis_index_path,"/index.html](../",adonis_index_path,"/index.html)\n", sep="") } +``` +```{r, results='asis'} +flag_ancom <- isTRUE(params$ancom != "") ``` -```{r, eval = !params$flag_skip_ancom, results='asis'} -cat("## ANCOM\n", - "Analysis of Composition of Microbiomes (ANCOM) is applied to identify features that are differentially abundant across sample groups. A key assumption made by ANCOM is that few taxa (less than about 25%) will be differentially abundant between groups otherwise the method will be inaccurate.", - "Comparisons between groups of samples is performed for specific metadata that can be found in folder [qiime2/ancom/](../qiime2/ancom/). Each folder named 'Category-{treatment}-{taxonomic level}' contains an 'index.html' that allows to view the result of the statistical test between treatments.", +```{r, eval = flag_ancom, results='asis'} +cat("## ANCOM\n\n") +cat("Analysis of Composition of Microbiomes (ANCOM) is applied to identify features that are differentially + abundant across sample groups. A key assumption made by ANCOM is that few taxa (less than about 25%) + will be differentially abundant between groups otherwise the method will be inaccurate. + Comparisons between groups of samples is performed for specific metadata that can be found in folder + [qiime2/ancom/](../qiime2/ancom/). ", sep = "\n") + +cat("\n\nTest results are in separate folders following the scheme 'Category-{treatment}-{taxonomic level}':\n") +ancom <- sort( unlist( strsplit( params$ancom,"," ) ) ) +for (folder in ancom) { + ancom_path <- paste0("qiime2/ancom/",folder) + cat("\n- [",ancom_path,"/index.html](../",ancom_path,"/index.html)\n", sep="") +} ``` ```{r, results='asis'} diff --git a/bin/generate_report.R b/bin/generate_report.R index 27a2ec99..442deaac 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -73,7 +73,7 @@ option_list = list( make_option(c("--diversity_indices_depth"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--diversity_indices_adonis"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--qiime_adonis_formula"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--flag_skip_ancom"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), + make_option(c("--ancom"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--picrust_pathways"), type="character", default=NULL, help="MultiQC plots", metavar="character") ) @@ -149,5 +149,5 @@ rmarkdown::render(opt$report, output_file = opt$output, diversity_indices_depth = opt$diversity_indices_depth, diversity_indices_adonis = opt$diversity_indices_adonis, qiime_adonis_formula = opt$qiime_adonis_formula, - flag_skip_ancom = opt$flag_skip_ancom, + ancom = opt$ancom, picrust_pathways = opt$picrust_pathways)) diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 3eb85bae..ceff3b2c 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -46,7 +46,7 @@ process SUMMARY_REPORT { val(alpha_rarefaction) path(diversity_indices) path(diversity_indices_adonis) - val(ancom) + path(ancom) path(picrust_pathways) @@ -95,7 +95,7 @@ process SUMMARY_REPORT { qiime2 += alpha_rarefaction ? "" : " --flag_skip_alpha_rarefaction" qiime2 += diversity_indices ? " --diversity_indices_depth $diversity_indices" : " --flag_skip_diversity_indices" qiime2 += diversity_indices_adonis ? " --diversity_indices_adonis '"+ diversity_indices_adonis.join(",") +"' --qiime_adonis_formula $params.qiime_adonis_formula" : "" - qiime2 += ancom ? "" : " --flag_skip_ancom" + qiime2 += ancom ? " --ancom '"+ ancom.join(",") +"'" : "" def picrust = picrust_pathways ? "--picrust_pathways $picrust_pathways" : "" """ generate_report.R --report $report_template \\ diff --git a/subworkflows/local/qiime2_ancom.nf b/subworkflows/local/qiime2_ancom.nf index af83733d..ce308d78 100644 --- a/subworkflows/local/qiime2_ancom.nf +++ b/subworkflows/local/qiime2_ancom.nf @@ -34,4 +34,7 @@ workflow QIIME2_ANCOM { QIIME2_ANCOM_TAX.out.ancom.subscribe { if ( it.baseName[0].toString().startsWith("WARNING") ) log.warn it.baseName[0].toString().replace("WARNING ","QIIME2_ANCOM_TAX: ") } QIIME2_ANCOM_ASV ( ch_metadata.combine( QIIME2_FILTERSAMPLES_ANCOM.out.qza.flatten() ) ) + + emit: + ancom = QIIME2_ANCOM_ASV.out.ancom.mix(QIIME2_ANCOM_TAX.out.ancom) } diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 6c1f1816..8ce8d156 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -721,9 +721,8 @@ workflow AMPLISEQ { run_qiime2 && !params.skip_alpha_rarefaction ? "done" : "", run_qiime2 && !params.skip_diversity_indices && params.metadata ? QIIME2_DIVERSITY.out.depth : [], run_qiime2 && !params.skip_diversity_indices && params.metadata ? QIIME2_DIVERSITY.out.adonis.collect() : [], - run_qiime2 && !params.skip_ancom && params.metadata ? "done" : "", + run_qiime2 && !params.skip_ancom && params.metadata ? QIIME2_ANCOM.out.ancom.collect() : [], params.picrust ? PICRUST.out.pathways : [] - // params.qiime_adonis_formula ) ch_versions = ch_versions.mix(SUMMARY_REPORT.out.versions) } From 9d63c9baf9e7277303395a6dc37f232e63bb91c3 Mon Sep 17 00:00:00 2001 From: daniel Date: Thu, 6 Jul 2023 15:07:38 +0200 Subject: [PATCH 073/230] add barplot_average and statistics for PCoA --- assets/report_template.Rmd | 34 ++++++++++++++++++++++++++------- bin/generate_report.R | 4 ++++ modules/local/summary_report.nf | 6 ++++-- workflows/ampliseq.nf | 1 + 4 files changed, 36 insertions(+), 9 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index fc9533e7..178bd4cb 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -50,6 +50,7 @@ params: min_samples: "" qiime2_filtertaxa: "" val_used_taxonomy: "" + metadata_category_barplot: "" qiime_adonis_formula: "" # file paths @@ -82,6 +83,7 @@ params: qiime2_taxonomy: "" filter_stats_tsv: "" diversity_indices_depth: "" + diversity_indices_beta: "" diversity_indices_adonis: "" picrust_pathways: "" @@ -1031,6 +1033,21 @@ cat("## Barplot\n", "Folder [qiime2/barplot](../qiime2/barplot) contains barplots, click [qiime2/barplot/index.html](../qiime2/barplot/index.html) to open it in your web browser.") ``` +```{r, results='asis'} +flag_metadata_category_barplot <- isTRUE(params$metadata_category_barplot != "") +``` + +```{r, eval = params$flag_metadata_category_barplot, results='asis'} +cat("\n\nAdditionally, barplots with average relative abundance values were produced for", + params$metadata_category_barplot,"(comma separated if several) in [qiime2/barplot_average](../qiime2/barplot_average) + in separate folders following the scheme 'barplot_{treatment}':\n") +metadata_category_barplot <- sort( unlist( strsplit( params$metadata_category_barplot,"," ) ) ) +for (category in metadata_category_barplot) { + barplot_folder_path <- paste0("qiime2/barplot_average/barplot_",category) + cat("\n- [",barplot_folder_path,"/index.html](../",barplot_folder_path,"/index.html)\n", sep="") +} +``` + ```{r, eval = !params$flag_skip_alpha_rarefaction, results='asis'} cat("## Alpha diversity rarefaction curves\n", "Produces rarefaction plots for several alpha diversity indices, and is primarily used to determine if the richness of the samples has been fully observed or sequenced. If the slope of the curves does not level out and the lines do not becomes horizontal, this might be because the sequencing depth was too low to observe all diversity or that sequencing error artificially increases sequence diversity and causes false discoveries. ") @@ -1061,13 +1078,18 @@ cat("This step calculates alpha diversity using various methods and performs pai "\n### Beta diversity indices\n", "Beta diversity measures the species community differences between samples. This step calculates beta diversity distances using various methods and performs pairwise comparisons of groups of samples. Additionally, principle coordinates analysis (PCoA) plots are produced that can be visualized with Emperor in your default browser without the need for installation. This calculations are based on a phylogenetic tree of all ASV sequences. ", "Folder [qiime2/diversity/beta_diversity](../qiime2/diversity/beta_diversity) contains the beta-diverity data:\n", - "(1) PCoA for four different beta diversity distances are accessible via:", + "1 PCoA for four different beta diversity distances are accessible via:\n", "- Bray-Curtis distance (quantitative): [qiime2/diversity/beta_diversity/bray_curtis_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/bray_curtis_pcoa_results-PCoA/index.html)\n", "- Jaccard distance (qualitative): [qiime2/diversity/beta_diversity/jaccard_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/jaccard_pcoa_results-PCoA/index.html)\n", "- unweighted UniFrac distance (qualitative, phylogenetic) [qiime2/diversity/beta_diversity/unweighted_unifrac_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/unweighted_unifrac_pcoa_results-PCoA/index.html)\n", "- weighted UniFrac distance (quantitative, phylogenetic): [qiime2/diversity/beta_diversity/weighted_unifrac_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/weighted_unifrac_pcoa_results-PCoA/index.html)\n", - "(2) Pairwise comparisons of groups of samples is performed for specific metadata that can be found in folder [qiime2/diversity/beta_diversity/](../qiime2/diversity/beta_diversity/). Each folder named '{method}_distance_matrix-{treatment}' contains an 'index.html' that allows to view the result of the statistical test for the diversity method between treatments.", + "2 Pairwise comparisons between groups of samples is performed for specific metadata that can be found in folder [qiime2/diversity/beta_diversity/](../qiime2/diversity/beta_diversity/). Each significance test result is in its separate folder following the scheme '{method}_distance_matrix-{treatment}':", sep = "\n") +diversity_indices_beta <- sort( unlist( strsplit( params$diversity_indices_beta,"," ) ) ) +for (folder in diversity_indices_beta) { + beta_folder_path <- paste0("qiime2/diversity/",folder) #"beta_diversity/" is defined in input section with "stageAs: 'beta_diversity/*'" + cat("\n- [",beta_folder_path,"/index.html](../",beta_folder_path,"/index.html)\n", sep="") +} ``` ```{r, results='asis'} @@ -1075,13 +1097,11 @@ flag_qiime_adonis_formula <- isTRUE(params$qiime_adonis_formula != "") ``` ```{r, eval = params$flag_qiime_adonis_formula, results='asis'} -#qiime_adonis_formula -#diversity_indices_adonis -cat(" **ADONIS test for beta diversity**\n\n") +cat("_ADONIS test for beta diversity_\n\n") cat("Permutational multivariate analysis of variance using distance matrices (adonis) determines whether groups of samples are significantly different from one another. The formula was '",params$qiime_adonis_formula,"' (multiple formulas are comma separated). - ADONIS computes an R2 value (effect size) which shows the percentage of variation explained + adonis computes an R2 value (effect size) which shows the percentage of variation explained by a condition, as well as a p-value to determine the statistical significance. The sequence of conditions in the formula matters, the variance of factors is removed (statistically controlled for) from beginning to end of the formula. " ) @@ -1089,7 +1109,7 @@ cat("Permutational multivariate analysis of variance using distance matrices (ad cat("\n\nTest results are in separate folders following the scheme '{method}_distance_matrix-{adonis formula}':\n") diversity_indices_adonis <- sort( unlist( strsplit( params$diversity_indices_adonis,"," ) ) ) for (folder in diversity_indices_adonis) { - adonis_index_path <- paste0("qiime2/diversity/beta_diversity/adonis/",folder) + adonis_index_path <- paste0("qiime2/diversity/",folder) #"beta_diversity/" is defined in input section with "stageAs: 'beta_diversity/adonis/*'" cat("\n- [",adonis_index_path,"/index.html](../",adonis_index_path,"/index.html)\n", sep="") } ``` diff --git a/bin/generate_report.R b/bin/generate_report.R index 442deaac..b5b299eb 100755 --- a/bin/generate_report.R +++ b/bin/generate_report.R @@ -67,10 +67,12 @@ option_list = list( make_option(c("--min_frequency"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--min_samples"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--flag_skip_barplot"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), + make_option(c("--metadata_category_barplot"), type="character", default=NULL, help="Downstream analysis with QIIME2", metavar="character"), make_option(c("--flag_skip_abundance_tables"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), make_option(c("--flag_skip_alpha_rarefaction"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), make_option(c("--flag_skip_diversity_indices"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), make_option(c("--diversity_indices_depth"), type="character", default=NULL, help="MultiQC plots", metavar="character"), + make_option(c("--diversity_indices_beta"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--diversity_indices_adonis"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--qiime_adonis_formula"), type="character", default=NULL, help="MultiQC plots", metavar="character"), make_option(c("--ancom"), type="character", default=NULL, help="MultiQC plots", metavar="character"), @@ -143,10 +145,12 @@ rmarkdown::render(opt$report, output_file = opt$output, min_frequency = opt$min_frequency, min_samples = opt$min_samples, flag_skip_barplot = opt$flag_skip_barplot, + metadata_category_barplot = opt$metadata_category_barplot, flag_skip_abundance_tables = opt$flag_skip_abundance_tables, flag_skip_alpha_rarefaction = opt$flag_skip_alpha_rarefaction, flag_skip_diversity_indices = opt$flag_skip_diversity_indices, diversity_indices_depth = opt$diversity_indices_depth, + diversity_indices_beta = opt$diversity_indices_beta, diversity_indices_adonis = opt$diversity_indices_adonis, qiime_adonis_formula = opt$qiime_adonis_formula, ancom = opt$ancom, diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index ceff3b2c..19f80eb7 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -45,7 +45,8 @@ process SUMMARY_REPORT { val(abundance_tables) val(alpha_rarefaction) path(diversity_indices) - path(diversity_indices_adonis) + path(diversity_indices_beta, stageAs: 'beta_diversity/*') // prevent folder name collisons + path(diversity_indices_adonis, stageAs: 'beta_diversity/adonis/*') // prevent folder name collisons path(ancom) path(picrust_pathways) @@ -91,9 +92,10 @@ process SUMMARY_REPORT { def qiime2 = run_qiime2 ? "--val_used_taxonomy '$val_used_taxonomy'" : "--flag_skip_qiime" qiime2 += filter_stats_tsv ? " --filter_stats_tsv $filter_stats_tsv --qiime2_filtertaxa '$qiime2_filtertaxa' --exclude_taxa $params.exclude_taxa --min_frequency $params.min_frequency --min_samples $params.min_samples" : "" qiime2 += barplot ? "" : " --flag_skip_barplot" + qiime2 += barplot && params.metadata_category_barplot ? " --metadata_category_barplot '$params.metadata_category_barplot'" : "" qiime2 += abundance_tables ? "" : " --flag_skip_abundance_tables" qiime2 += alpha_rarefaction ? "" : " --flag_skip_alpha_rarefaction" - qiime2 += diversity_indices ? " --diversity_indices_depth $diversity_indices" : " --flag_skip_diversity_indices" + qiime2 += diversity_indices ? " --diversity_indices_depth $diversity_indices --diversity_indices_beta '"+ diversity_indices_beta.join(",") +"'" : " --flag_skip_diversity_indices" qiime2 += diversity_indices_adonis ? " --diversity_indices_adonis '"+ diversity_indices_adonis.join(",") +"' --qiime_adonis_formula $params.qiime_adonis_formula" : "" qiime2 += ancom ? " --ancom '"+ ancom.join(",") +"'" : "" def picrust = picrust_pathways ? "--picrust_pathways $picrust_pathways" : "" diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 8ce8d156..cb3740c0 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -720,6 +720,7 @@ workflow AMPLISEQ { run_qiime2 && !params.skip_abundance_tables ? "done" : "", run_qiime2 && !params.skip_alpha_rarefaction ? "done" : "", run_qiime2 && !params.skip_diversity_indices && params.metadata ? QIIME2_DIVERSITY.out.depth : [], + run_qiime2 && !params.skip_diversity_indices && params.metadata ? QIIME2_DIVERSITY.out.beta.collect() : [], run_qiime2 && !params.skip_diversity_indices && params.metadata ? QIIME2_DIVERSITY.out.adonis.collect() : [], run_qiime2 && !params.skip_ancom && params.metadata ? QIIME2_ANCOM.out.ancom.collect() : [], params.picrust ? PICRUST.out.pathways : [] From 28d7704d387e3c1b135fb69972874f9bd11d1c9f Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 7 Jul 2023 15:45:30 +0200 Subject: [PATCH 074/230] remove the need for bin/generate_report.R --- assets/report_template.Rmd | 8 +- bin/generate_report.R | 157 -------------------------------- modules/local/summary_report.nf | 143 ++++++++++++++--------------- 3 files changed, 74 insertions(+), 234 deletions(-) delete mode 100755 bin/generate_report.R diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 178bd4cb..1c1fa024 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -50,7 +50,7 @@ params: min_samples: "" qiime2_filtertaxa: "" val_used_taxonomy: "" - metadata_category_barplot: "" + metadata_category_barplot: FALSE qiime_adonis_formula: "" # file paths @@ -1033,11 +1033,7 @@ cat("## Barplot\n", "Folder [qiime2/barplot](../qiime2/barplot) contains barplots, click [qiime2/barplot/index.html](../qiime2/barplot/index.html) to open it in your web browser.") ``` -```{r, results='asis'} -flag_metadata_category_barplot <- isTRUE(params$metadata_category_barplot != "") -``` - -```{r, eval = params$flag_metadata_category_barplot, results='asis'} +```{r, eval = params$metadata_category_barplot, results='asis'} cat("\n\nAdditionally, barplots with average relative abundance values were produced for", params$metadata_category_barplot,"(comma separated if several) in [qiime2/barplot_average](../qiime2/barplot_average) in separate folders following the scheme 'barplot_{treatment}':\n") diff --git a/bin/generate_report.R b/bin/generate_report.R deleted file mode 100755 index b5b299eb..00000000 --- a/bin/generate_report.R +++ /dev/null @@ -1,157 +0,0 @@ -#!/usr/bin/env Rscript - -library(rmarkdown) -library(optparse) - - -option_list = list( - make_option(c("-r", "--report"), type="character", default=NULL, help="report template file", metavar="character"), - make_option(c("-o", "--output"), type="character", default="ampliseq_report.html", help="output file name", metavar="character"), - make_option(c("--skip_fastqc"), action="store_true", default=FALSE, help="Trigger to skip fastqc reporting", metavar="logical"), - make_option(c("--skip_cutadapt"), action="store_true", default=FALSE, help="Trigger to skip cutadapt filtering", metavar="logical"), - make_option(c("--skip_dada_quality"), action="store_true", default=FALSE, help="Trigger to skip dada2 quality plotting", metavar="logical"), - make_option(c("--skip_barrnap"), action="store_true", default=FALSE, help="Trigger to skip barrnap ASV filtering", metavar="logical"), - #make_option(c("--skip_taxonomy"), action="store_true", default=FALSE, help="Trigger to skip taxonomic classification", metavar="logical"), - make_option(c("--retain_untrimmed"), action="store_true", default=FALSE, help="Flag to retain the untrimmed sequences", metavar="logical"), - make_option(c("--ref_tax_user"), action="store_true", default=FALSE, help="Flag that user provided custom db", metavar="logical"), - make_option(c("--single_end"), action="store_true", default=FALSE, help="Flag if single end data is used", metavar="logical"), - make_option(c("--trunclenf"), type="numeric", default=-1, help="Parameter to define truncation in forward strand", metavar="numeric"), - make_option(c("--trunclenr"), type="numeric", default=-1, help="Parameter to define truncation in reverse strand", metavar="numeric"), - make_option(c("--max_ee"), type="numeric", default=-1, help="Parameter to filter reads based on expected errors", metavar="numeric"), - make_option(c("--trunc_qmin"), type="numeric", default=-1, help="Parameter to define truncation via quality measure. Set to -1 if trunclen were given.", metavar="numeric"), - make_option(c("--trunc_rmin"), type="numeric", default=-1, help="Parameter to define truncation via read retaining ratio. Set to -1 if trunclen were given.", metavar="numeric"), - make_option(c("--mqc_plot"), type="character", default=NULL, help="MultiQC plot per sequence quality", metavar="character"), - make_option(c("--ca_sum_path"), type="character", default=NULL, help="cutadapt summary table", metavar="character"), - make_option(c("--dada_filtntrim_args"), type="character", default=NULL, help="DADA2 arguments for filter and trim process", metavar="character"), - make_option(c("--dada_qc_f_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--dada_qc_r_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--dada_pp_qc_f_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--dada_pp_qc_r_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--dada_err_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--dada_err_run"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--asv_table_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--path_asv_fa"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--path_dada2_tab"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--dada_stats_path"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--dada_sample_inference"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--filter_ssu"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--filter_ssu_stats"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--filter_ssu_asv"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--path_barrnap_sum"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--filter_len_asv"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--min_len_asv"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--max_len_asv"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--filter_codons"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--stop_codons"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--filter_len_asv_len_orig"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--itsx_cutasv_summary"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--cut_its"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--dada2_ref_tax_title"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--cut_dada_ref_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--dada2_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--flag_dada2_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), - make_option(c("--sintax_ref_tax_title"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--sintax_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--flag_sintax_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), - make_option(c("--pplace_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--pplace_heattree"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--flag_pplace_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), - make_option(c("--flag_qiime2_taxonomy"), action="store_true", default=FALSE, help="MultiQC plots", metavar="character"), - make_option(c("--val_used_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--qiime2_ref_tax_title"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--qiime2_taxonomy"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--flag_skip_qiime"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), - make_option(c("--filter_stats_tsv"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--qiime2_filtertaxa"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--exclude_taxa"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--min_frequency"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--min_samples"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--flag_skip_barplot"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), - make_option(c("--metadata_category_barplot"), type="character", default=NULL, help="Downstream analysis with QIIME2", metavar="character"), - make_option(c("--flag_skip_abundance_tables"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), - make_option(c("--flag_skip_alpha_rarefaction"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), - make_option(c("--flag_skip_diversity_indices"), action="store_true", default=FALSE, help="Downstream analysis with QIIME2", metavar="logical"), - make_option(c("--diversity_indices_depth"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--diversity_indices_beta"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--diversity_indices_adonis"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--qiime_adonis_formula"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--ancom"), type="character", default=NULL, help="MultiQC plots", metavar="character"), - make_option(c("--picrust_pathways"), type="character", default=NULL, help="MultiQC plots", metavar="character") -) - -opt_parser = OptionParser(option_list = option_list) -opt = parse_args(opt_parser) - -rmarkdown::render(opt$report, output_file = opt$output, - params = list( - flag_skip_fastqc = opt$skip_fastqc, - flag_skip_cutadapt = opt$skip_cutadapt, - flag_skip_dada_quality = opt$skip_dada_quality, - flag_skip_barrnap = opt$skip_barrnap, - #flag_skip_taxonomy = opt$skip_taxonomy, - flag_retain_untrimmed = opt$retain_untrimmed, - flag_ref_tax_user = opt$ref_tax_user, - flag_single_end = opt$single_end, - trunclenf = opt$trunclenf, - trunclenr = opt$trunclenr, - max_ee = opt$max_ee, - trunc_qmin = opt$trunc_qmin, - trunc_rmin = opt$trunc_rmin, - mqc_plot = opt$mqc_plot, - ca_sum_path = opt$ca_sum_path, - dada_filtntrim_args = opt$dada_filtntrim_args, - dada_qc_f_path = opt$dada_qc_f_path, - dada_qc_r_path = opt$dada_qc_r_path, - dada_pp_qc_f_path = opt$dada_pp_qc_f_path, - dada_pp_qc_r_path = opt$dada_pp_qc_r_path, - dada_err_path = opt$dada_err_path, - dada_err_run = opt$dada_err_run, - asv_table_path = opt$asv_table_path, - path_asv_fa = opt$path_asv_fa, - path_dada2_tab = opt$path_dada2_tab, - dada_stats_path = opt$dada_stats_path, - dada_sample_inference = opt$dada_sample_inference, - filter_ssu = opt$filter_ssu, - filter_ssu_stats = opt$filter_ssu_stats, - filter_ssu_asv = opt$filter_ssu_asv, - path_barrnap_sum = opt$path_barrnap_sum, - filter_len_asv = opt$filter_len_asv, - min_len_asv = opt$min_len_asv, - max_len_asv = opt$max_len_asv, - filter_len_asv_len_orig = opt$filter_len_asv_len_orig, - filter_codons = opt$filter_codons, - stop_codons = opt$stop_codons, - itsx_cutasv_summary = opt$itsx_cutasv_summary, - cut_its = opt$cut_its, - cut_its = opt$cut_its, - dada2_ref_tax_title = opt$dada2_ref_tax_title, - cut_dada_ref_taxonomy = opt$cut_dada_ref_taxonomy, - dada2_taxonomy = opt$dada2_taxonomy, - flag_dada2_taxonomy = opt$flag_dada2_taxonomy, - flag_sintax_taxonomy = opt$flag_sintax_taxonomy, - sintax_ref_tax_title = opt$sintax_ref_tax_title, - sintax_taxonomy = opt$sintax_taxonomy, - flag_pplace_taxonomy = opt$flag_pplace_taxonomy, - pplace_taxonomy = opt$pplace_taxonomy, - pplace_heattree = opt$pplace_heattree, - flag_qiime2_taxonomy = opt$flag_qiime2_taxonomy, - val_used_taxonomy = opt$val_used_taxonomy, - qiime2_ref_tax_title = opt$qiime2_ref_tax_title, - qiime2_taxonomy = opt$qiime2_taxonomy, - flag_skip_qiime = opt$flag_skip_qiime, - filter_stats_tsv = opt$filter_stats_tsv, - qiime2_filtertaxa = opt$qiime2_filtertaxa, - exclude_taxa = opt$exclude_taxa, - min_frequency = opt$min_frequency, - min_samples = opt$min_samples, - flag_skip_barplot = opt$flag_skip_barplot, - metadata_category_barplot = opt$metadata_category_barplot, - flag_skip_abundance_tables = opt$flag_skip_abundance_tables, - flag_skip_alpha_rarefaction = opt$flag_skip_alpha_rarefaction, - flag_skip_diversity_indices = opt$flag_skip_diversity_indices, - diversity_indices_depth = opt$diversity_indices_depth, - diversity_indices_beta = opt$diversity_indices_beta, - diversity_indices_adonis = opt$diversity_indices_adonis, - qiime_adonis_formula = opt$qiime_adonis_formula, - ancom = opt$ancom, - picrust_pathways = opt$picrust_pathways)) diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 19f80eb7..67f947d1 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -60,79 +60,80 @@ process SUMMARY_REPORT { task.ext.when == null || task.ext.when script: - def single_end = meta.single_end ? "--single_end" : "" - def fastqc = params.skip_fastqc || params.skip_multiqc ? "--skip_fastqc" : "--mqc_plot ${mqc_plots}/svg/mqc_fastqc_per_sequence_quality_scores_plot_1.svg" - def cutadapt = params.skip_cutadapt ? "--skip_cutadapt" : - params.retain_untrimmed ? "--retain_untrimmed --ca_sum_path $ca_summary" : - "--ca_sum_path $ca_summary" - // Even when in "dada2_preprocessing.nf" is stated "qc_svg = ch_DADA2_QUALITY1_SVG.collect(sort:true)" the whole path, not only the file name, is used to sort. So FW cannot be guaranteed to be before RV! - def dada_quality = params.skip_dada_quality ? "--skip_dada_quality" : - meta.single_end ? "--dada_qc_f_path $dada_qual_stats --dada_pp_qc_f_path $dada_pp_qual_stats" : - "--dada_qc_f_path 'FW_qual_stats.svg' --dada_qc_r_path 'RV_qual_stats.svg' --dada_pp_qc_f_path 'FW_preprocessed_qual_stats.svg' --dada_pp_qc_r_path 'RV_preprocessed_qual_stats.svg'" - def find_truncation = find_truncation_values ? "--trunc_qmin $params.trunc_qmin --trunc_rmin $params.trunc_rmin" : "" - // make comma separated list of error profile path when multiple sequencing runs were performed - if ( meta.run.size() == 1 && meta.single_end ) { - dada_err = "--dada_err_path $dada_err_svgs --dada_err_run " + meta.run - } else { - dada_err = "--dada_err_path " + dada_err_svgs.join(',') + " --dada_err_run " + meta.run.join(',') - } - def barrnap = params.skip_barrnap ? "--skip_barrnap" : "--path_barrnap_sum $barrnap_summary" - barrnap += filter_ssu_stats ? " --filter_ssu_stats $filter_ssu_stats --filter_ssu_asv $filter_ssu_asv --filter_ssu $params.filter_ssu" : " --filter_ssu none" - def filter_len_asv = filter_len_asv_stats ? "--filter_len_asv $filter_len_asv_stats --filter_len_asv_len_orig $filter_len_asv_len_orig" : "" - filter_len_asv += params.min_len_asv ? " --min_len_asv $params.min_len_asv " : " --min_len_asv 0" - filter_len_asv += params.max_len_asv ? " --max_len_asv $params.max_len_asv" : " --max_len_asv 0" - def filter_codons = filter_codons_stats ? "--filter_codons $filter_codons_stats --stop_codons $params.stop_codons" : "" - def itsx_cutasv = itsx_cutasv_summary ? "--itsx_cutasv_summary $itsx_cutasv_summary --cut_its $params.cut_its" : "--cut_its none" - def dada2_taxonomy = !dada2_tax ? "" : - params.dada_ref_tax_custom ? "--flag_dada2_taxonomy --dada2_taxonomy $dada2_tax --ref_tax_user" : "--flag_dada2_taxonomy --dada2_taxonomy $dada2_tax --dada2_ref_tax_title '${params.dada_ref_databases[params.dada_ref_taxonomy]["title"]}'" - dada2_taxonomy += cut_dada_ref_taxonomy ? " --cut_dada_ref_taxonomy $cut_dada_ref_taxonomy" : "" - def sintax_taxonomy = sintax_tax ? "--flag_sintax_taxonomy --sintax_taxonomy $sintax_tax --sintax_ref_tax_title '${params.sintax_ref_databases[params.sintax_ref_taxonomy]["title"]}'" : "" - def pplace_taxonomy = pplace_tax ? "--flag_pplace_taxonomy --pplace_taxonomy $pplace_tax --pplace_heattree $pplace_heattree" : "" - def qiime2_taxonomy = qiime2_tax ? "--flag_qiime2_taxonomy --qiime2_taxonomy $qiime2_tax --qiime2_ref_tax_title '${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}'" : "" - def qiime2 = run_qiime2 ? "--val_used_taxonomy '$val_used_taxonomy'" : "--flag_skip_qiime" - qiime2 += filter_stats_tsv ? " --filter_stats_tsv $filter_stats_tsv --qiime2_filtertaxa '$qiime2_filtertaxa' --exclude_taxa $params.exclude_taxa --min_frequency $params.min_frequency --min_samples $params.min_samples" : "" - qiime2 += barplot ? "" : " --flag_skip_barplot" - qiime2 += barplot && params.metadata_category_barplot ? " --metadata_category_barplot '$params.metadata_category_barplot'" : "" - qiime2 += abundance_tables ? "" : " --flag_skip_abundance_tables" - qiime2 += alpha_rarefaction ? "" : " --flag_skip_alpha_rarefaction" - qiime2 += diversity_indices ? " --diversity_indices_depth $diversity_indices --diversity_indices_beta '"+ diversity_indices_beta.join(",") +"'" : " --flag_skip_diversity_indices" - qiime2 += diversity_indices_adonis ? " --diversity_indices_adonis '"+ diversity_indices_adonis.join(",") +"' --qiime_adonis_formula $params.qiime_adonis_formula" : "" - qiime2 += ancom ? " --ancom '"+ ancom.join(",") +"'" : "" + + + def picrust = picrust_pathways ? "--picrust_pathways $picrust_pathways" : "" + + // make named R list (comma separated) + // all non-boolean or non-numeric values must be encumbered by single quotes (')! + // all elements must have a value, i.e. booleans also need to be set to TRUE + def params_list_named = [ + meta.single_end ? "flag_single_end=TRUE" : "", + params.skip_fastqc || params.skip_multiqc ? + "flag_skip_fastqc=TRUE" : + "mqc_plot='${mqc_plots}/svg/mqc_fastqc_per_sequence_quality_scores_plot_1.svg'", + params.skip_cutadapt ? "flag_skip_cutadapt=TRUE" : + params.retain_untrimmed ? "flag_retain_untrimmed=TRUE,ca_sum_path='$ca_summary'" : + "ca_sum_path='$ca_summary'", + find_truncation_values ? "trunc_qmin=$params.trunc_qmin,trunc_rmin=$params.trunc_rmin" : "", + "trunclenf='$params.trunclenf'", + "trunclenr='$params.trunclenr'", + "max_ee=$params.max_ee", + params.skip_dada_quality ? "--skip_dada_quality" : + meta.single_end ? "dada_qc_f_path='$dada_qual_stats',dada_pp_qc_f_path='$dada_pp_qual_stats'" : + "dada_qc_f_path='FW_qual_stats.svg',dada_qc_r_path='RV_qual_stats.svg',dada_pp_qc_f_path='FW_preprocessed_qual_stats.svg',dada_pp_qc_r_path='RV_preprocessed_qual_stats.svg'", + "dada_filtntrim_args='$dada_filtntrim_args'", + "dada_sample_inference='$params.sample_inference'", + meta.run.size() == 1 && meta.single_end ? + "dada_err_path='$dada_err_svgs',dada_err_run='"+meta.run+"'" : + "dada_err_path='"+dada_err_svgs.join(',')+"',dada_err_run='"+meta.run.join(',')+"'", + "asv_table_path='$dada_asv_table'", + "path_asv_fa='$dada_asv_fa'", + "path_dada2_tab='$dada_tab'", + "dada_stats_path='$dada_stats'", + params.skip_barrnap ? "skip_barrnap=TRUE" : "path_barrnap_sum='$barrnap_summary'", + filter_ssu_stats ? "filter_ssu_stats='$filter_ssu_stats',filter_ssu_asv='$filter_ssu_asv',filter_ssu='$params.filter_ssu'" : "filter_ssu=none", + filter_len_asv_stats ? "filter_len_asv='$filter_len_asv_stats',filter_len_asv_len_orig='$filter_len_asv_len_orig'" : "", + params.min_len_asv ? "min_len_asv=$params.min_len_asv" : "min_len_asv=0", + params.max_len_asv ? "max_len_asv=$params.max_len_asv" : "max_len_asv=0", + filter_codons_stats ? "filter_codons='$filter_codons_stats',stop_codons='$params.stop_codons'" : "", + itsx_cutasv_summary ? "itsx_cutasv_summary='$itsx_cutasv_summary','cut_its=$params.cut_its" : "cut_its='none'", + !dada2_tax ? "" : + params.dada_ref_tax_custom ? "flag_dada2_taxonomy=TRUE,dada2_taxonomy='$dada2_tax',ref_tax_user=TRUE" : + "flag_dada2_taxonomy=TRUE,dada2_taxonomy='$dada2_tax',dada2_ref_tax_title='${params.dada_ref_databases[params.dada_ref_taxonomy]["title"]}'", + cut_dada_ref_taxonomy ? "cut_dada_ref_taxonomy='$cut_dada_ref_taxonomy'" : "", + sintax_tax ? "flag_sintax_taxonomy=TRUE,sintax_taxonomy='$sintax_tax',sintax_ref_tax_title='${params.sintax_ref_databases[params.sintax_ref_taxonomy]["title"]}'" : "", + pplace_tax ? "flag_pplace_taxonomy=TRUE,pplace_taxonomy='$pplace_tax',pplace_heattree='$pplace_heattree'" : "", + qiime2_tax ? "flag_qiime2_taxonomy=TRUE,qiime2_taxonomy='$qiime2_tax',qiime2_ref_tax_title='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}'" : "", + run_qiime2 ? "val_used_taxonomy='$val_used_taxonomy'" : "flag_skip_qiime=TRUE", + filter_stats_tsv ? "filter_stats_tsv='$filter_stats_tsv',qiime2_filtertaxa='$qiime2_filtertaxa',exclude_taxa='$params.exclude_taxa',min_frequency='$params.min_frequency',min_samples='$params.min_samples'" : "", + barplot ? "" : "flag_skip_barplot=TRUE", + barplot && params.metadata_category_barplot ? "metadata_category_barplot='$params.metadata_category_barplot'" : "", + abundance_tables ? "" : "flag_skip_abundance_tables=TRUE", + alpha_rarefaction ? "" : "flag_skip_alpha_rarefaction=TRUE", + diversity_indices ? "diversity_indices_depth='$diversity_indices',diversity_indices_beta='"+ diversity_indices_beta.join(",") +"'" : "flag_skip_diversity_indices=TRUE", + diversity_indices_adonis ? "diversity_indices_adonis='"+ diversity_indices_adonis.join(",") +"',qiime_adonis_formula='$params.qiime_adonis_formula'" : "", + ancom ? "ancom='"+ ancom.join(",") +"'" : "", + ] + // groovy list to R named list string; findAll removes empty entries + params_list_named_string = params_list_named.findAll().join(',').trim() """ - generate_report.R --report $report_template \\ - --output "summary_report.html" \\ - $fastqc \\ - $cutadapt \\ - $dada_quality \\ - --asv_table_path $dada_asv_table \\ - --path_asv_fa $dada_asv_fa \\ - --path_dada2_tab $dada_tab \\ - --dada_stats_path $dada_stats \\ - --dada_filtntrim_args $dada_filtntrim_args \\ - --dada_sample_inference $params.sample_inference \\ - $dada_err \\ - $barrnap \\ - $single_end \\ - $find_truncation \\ - --trunclenf $params.trunclenf \\ - --trunclenr $params.trunclenr \\ - --max_ee $params.max_ee \\ - $filter_len_asv \\ - $filter_codons \\ - $itsx_cutasv \\ - $dada2_taxonomy \\ - $sintax_taxonomy \\ - $pplace_taxonomy \\ - $qiime2_taxonomy \\ - $qiime2 \\ - $picrust + #!/usr/bin/env Rscript + library(rmarkdown) + + # Work around https://github.com/rstudio/rmarkdown/issues/1508 + # If the symbolic link is not replaced by a physical file + # output- and temporary files will be written to the original directory. + file.copy("./${report_template}", "./template.Rmd", overwrite = TRUE) + + rmarkdown::render("template.Rmd", output_file = "summary_report.html", params = list($params_list_named_string), envir = new.env()) - cat <<-END_VERSIONS > versions.yml - "${task.process}": - R: \$(R --version 2>&1 | sed -n 1p | sed 's/R version //' | sed 's/ (.*//') - rmarkdown: \$(Rscript -e "cat(paste(packageVersion('rmarkdown'), collapse='.'))") - knitr: \$(Rscript -e "cat(paste(packageVersion('knitr'), collapse='.'))") - END_VERSIONS + writeLines(c("\\"${task.process}\\":", + paste0(" R: ", paste0(R.Version()[c("major","minor")], collapse = ".")), + paste0(" rmarkdown: ", packageVersion("rmarkdown")), + paste0(" knitr: ", packageVersion("knitr")) ), + "versions.yml") + #writeLines(c("doesnt","work","yet"),"versions.yml") """ } From cd31b844b70e752a54f23fa5f5a2becc528ff018 Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 7 Jul 2023 15:52:46 +0200 Subject: [PATCH 075/230] fix params list --- modules/local/summary_report.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 67f947d1..b0a90e56 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -93,7 +93,7 @@ process SUMMARY_REPORT { "path_dada2_tab='$dada_tab'", "dada_stats_path='$dada_stats'", params.skip_barrnap ? "skip_barrnap=TRUE" : "path_barrnap_sum='$barrnap_summary'", - filter_ssu_stats ? "filter_ssu_stats='$filter_ssu_stats',filter_ssu_asv='$filter_ssu_asv',filter_ssu='$params.filter_ssu'" : "filter_ssu=none", + filter_ssu_stats ? "filter_ssu_stats='$filter_ssu_stats',filter_ssu_asv='$filter_ssu_asv',filter_ssu='$params.filter_ssu'" : "filter_ssu='none'", filter_len_asv_stats ? "filter_len_asv='$filter_len_asv_stats',filter_len_asv_len_orig='$filter_len_asv_len_orig'" : "", params.min_len_asv ? "min_len_asv=$params.min_len_asv" : "min_len_asv=0", params.max_len_asv ? "max_len_asv=$params.max_len_asv" : "max_len_asv=0", From b7ac50af8d688e7fab645237bf9efd3a4b030518 Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 7 Jul 2023 16:14:49 +0200 Subject: [PATCH 076/230] fix params list 2 --- assets/report_template.Rmd | 4 ++-- modules/local/summary_report.nf | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 1c1fa024..daa9a4f4 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -35,7 +35,7 @@ params: trunclenf: "" trunclenr: "" max_ee: "" - trunc_qmin: "" + trunc_qmin: FALSE trunc_rmin: "" dada_sample_inference: "" filter_ssu: "" @@ -168,7 +168,7 @@ cat("\n\nCutadapt results can be found in folder [cutadapt](../cutadapt).") cat("Additional quality filtering can improve sequence recovery. ", "Often it is advised trimming the last few nucleotides to avoid less well-controlled errors that can arise there. ") -if (params$trunc_qmin != -1) { +if (params$trunc_qmin) { f_and_tr_args <- readLines(params$dada_filtntrim_args) trunc_len <- strsplit(gsub(".*truncLen = c\\((.+)\\),maxN.*", "\\1", f_and_tr_args), ", ") diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index b0a90e56..7d0a3d36 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -80,7 +80,7 @@ process SUMMARY_REPORT { "trunclenf='$params.trunclenf'", "trunclenr='$params.trunclenr'", "max_ee=$params.max_ee", - params.skip_dada_quality ? "--skip_dada_quality" : + params.skip_dada_quality ? "flag_skip_dada_quality=TRUE" : meta.single_end ? "dada_qc_f_path='$dada_qual_stats',dada_pp_qc_f_path='$dada_pp_qual_stats'" : "dada_qc_f_path='FW_qual_stats.svg',dada_qc_r_path='RV_qual_stats.svg',dada_pp_qc_f_path='FW_preprocessed_qual_stats.svg',dada_pp_qc_r_path='RV_preprocessed_qual_stats.svg'", "dada_filtntrim_args='$dada_filtntrim_args'", @@ -92,15 +92,15 @@ process SUMMARY_REPORT { "path_asv_fa='$dada_asv_fa'", "path_dada2_tab='$dada_tab'", "dada_stats_path='$dada_stats'", - params.skip_barrnap ? "skip_barrnap=TRUE" : "path_barrnap_sum='$barrnap_summary'", + params.skip_barrnap ? "flag_skip_barrnap=TRUE" : "path_barrnap_sum='$barrnap_summary'", filter_ssu_stats ? "filter_ssu_stats='$filter_ssu_stats',filter_ssu_asv='$filter_ssu_asv',filter_ssu='$params.filter_ssu'" : "filter_ssu='none'", filter_len_asv_stats ? "filter_len_asv='$filter_len_asv_stats',filter_len_asv_len_orig='$filter_len_asv_len_orig'" : "", params.min_len_asv ? "min_len_asv=$params.min_len_asv" : "min_len_asv=0", params.max_len_asv ? "max_len_asv=$params.max_len_asv" : "max_len_asv=0", filter_codons_stats ? "filter_codons='$filter_codons_stats',stop_codons='$params.stop_codons'" : "", - itsx_cutasv_summary ? "itsx_cutasv_summary='$itsx_cutasv_summary','cut_its=$params.cut_its" : "cut_its='none'", + itsx_cutasv_summary ? "itsx_cutasv_summary='$itsx_cutasv_summary','cut_its='$params.cut_its'" : "cut_its='none'", !dada2_tax ? "" : - params.dada_ref_tax_custom ? "flag_dada2_taxonomy=TRUE,dada2_taxonomy='$dada2_tax',ref_tax_user=TRUE" : + params.dada_ref_tax_custom ? "flag_dada2_taxonomy=TRUE,dada2_taxonomy='$dada2_tax',flag_ref_tax_user=TRUE" : "flag_dada2_taxonomy=TRUE,dada2_taxonomy='$dada2_tax',dada2_ref_tax_title='${params.dada_ref_databases[params.dada_ref_taxonomy]["title"]}'", cut_dada_ref_taxonomy ? "cut_dada_ref_taxonomy='$cut_dada_ref_taxonomy'" : "", sintax_tax ? "flag_sintax_taxonomy=TRUE,sintax_taxonomy='$sintax_tax',sintax_ref_tax_title='${params.sintax_ref_databases[params.sintax_ref_taxonomy]["title"]}'" : "", From 6d047ac2ac33bbbc44c5f2d219093cc2c3e2df3d Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 7 Jul 2023 16:58:42 +0200 Subject: [PATCH 077/230] fix params list 3 --- assets/report_template.Rmd | 26 +++++++++++++------------- modules/local/summary_report.nf | 3 +-- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index daa9a4f4..be2a4541 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -51,7 +51,7 @@ params: qiime2_filtertaxa: "" val_used_taxonomy: "" metadata_category_barplot: FALSE - qiime_adonis_formula: "" + qiime_adonis_formula: FALSE # file paths mqc_plot: "" @@ -206,7 +206,11 @@ if (params$flag_single_end) { ``` ```{r, eval = !params$flag_skip_dada_quality, out.width="49%", fig.show='hold', fig.align='default'} -knitr::include_graphics(c(params$dada_qc_f_path, params$dada_qc_r_path)) +if (params$flag_single_end) { + knitr::include_graphics(params$dada_qc_f_path) +} else { + knitr::include_graphics(c(params$dada_qc_f_path, params$dada_qc_r_path)) +} ``` ```{r, eval = !params$flag_skip_dada_quality, results='asis'} @@ -218,7 +222,11 @@ if (params$flag_single_end) { ``` ```{r, eval = !params$flag_skip_dada_quality, out.width="49%", fig.show='hold', fig.align='default'} -knitr::include_graphics(c(params$dada_pp_qc_f_path, params$dada_pp_qc_r_path)) +if (params$flag_single_end) { + knitr::include_graphics(params$dada_pp_qc_f_path) +} else { + knitr::include_graphics(c(params$dada_pp_qc_f_path, params$dada_pp_qc_r_path)) +} ``` ```{r, eval = !params$flag_skip_dada_quality, results='asis'} @@ -1088,11 +1096,7 @@ for (folder in diversity_indices_beta) { } ``` -```{r, results='asis'} -flag_qiime_adonis_formula <- isTRUE(params$qiime_adonis_formula != "") -``` - -```{r, eval = params$flag_qiime_adonis_formula, results='asis'} +```{r, eval = params$qiime_adonis_formula, results='asis'} cat("_ADONIS test for beta diversity_\n\n") cat("Permutational multivariate analysis of variance using distance matrices (adonis) determines whether groups of samples are significantly different from one another. @@ -1110,11 +1114,7 @@ for (folder in diversity_indices_adonis) { } ``` -```{r, results='asis'} -flag_ancom <- isTRUE(params$ancom != "") -``` - -```{r, eval = flag_ancom, results='asis'} +```{r, eval = params$ancom, results='asis'} cat("## ANCOM\n\n") cat("Analysis of Composition of Microbiomes (ANCOM) is applied to identify features that are differentially abundant across sample groups. A key assumption made by ANCOM is that few taxa (less than about 25%) diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 7d0a3d36..3a30404d 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -98,7 +98,7 @@ process SUMMARY_REPORT { params.min_len_asv ? "min_len_asv=$params.min_len_asv" : "min_len_asv=0", params.max_len_asv ? "max_len_asv=$params.max_len_asv" : "max_len_asv=0", filter_codons_stats ? "filter_codons='$filter_codons_stats',stop_codons='$params.stop_codons'" : "", - itsx_cutasv_summary ? "itsx_cutasv_summary='$itsx_cutasv_summary','cut_its='$params.cut_its'" : "cut_its='none'", + itsx_cutasv_summary ? "itsx_cutasv_summary='$itsx_cutasv_summary',cut_its='$params.cut_its'" : "cut_its='none'", !dada2_tax ? "" : params.dada_ref_tax_custom ? "flag_dada2_taxonomy=TRUE,dada2_taxonomy='$dada2_tax',flag_ref_tax_user=TRUE" : "flag_dada2_taxonomy=TRUE,dada2_taxonomy='$dada2_tax',dada2_ref_tax_title='${params.dada_ref_databases[params.dada_ref_taxonomy]["title"]}'", @@ -134,6 +134,5 @@ process SUMMARY_REPORT { paste0(" rmarkdown: ", packageVersion("rmarkdown")), paste0(" knitr: ", packageVersion("knitr")) ), "versions.yml") - #writeLines(c("doesnt","work","yet"),"versions.yml") """ } From 50e4607b35990c0c43f91651cbe77bb581ecfc66 Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 7 Jul 2023 17:09:02 +0200 Subject: [PATCH 078/230] fix non existent adonis channel --- subworkflows/local/qiime2_diversity.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/qiime2_diversity.nf b/subworkflows/local/qiime2_diversity.nf index b305168f..02f0d91e 100644 --- a/subworkflows/local/qiime2_diversity.nf +++ b/subworkflows/local/qiime2_diversity.nf @@ -77,5 +77,5 @@ workflow QIIME2_DIVERSITY { alpha = !skip_diversity_indices ? QIIME2_DIVERSITY_ALPHA.out.alpha : [] beta = !skip_diversity_indices ? QIIME2_DIVERSITY_BETA.out.beta : [] betaord = !skip_diversity_indices ? QIIME2_DIVERSITY_BETAORD.out.beta : [] - adonis = params.qiime_adonis_formula ? QIIME2_DIVERSITY_ADONIS.out.html : [] + adonis = !skip_diversity_indices && params.qiime_adonis_formula ? QIIME2_DIVERSITY_ADONIS.out.html : [] } From df3298a35b3d829b5736845e4607d2dd8cc821ab Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 10 Jul 2023 16:24:08 +0200 Subject: [PATCH 079/230] remove all flag_skip_ params --- assets/report_template.Rmd | 140 ++++++++++++-------------------- modules/local/summary_report.nf | 34 ++++---- 2 files changed, 68 insertions(+), 106 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index be2a4541..096940a0 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -14,23 +14,12 @@ output: #bibliography: ./references.bibtex params: # flags and arguments - flag_skip_fastqc: FALSE - flag_skip_cutadapt: FALSE - flag_skip_dada_quality: FALSE - flag_skip_barrnap: FALSE - #flag_skip_taxonomy: FALSE flag_retain_untrimmed: TRUE flag_ref_tax_user: FALSE flag_single_end: FALSE - flag_dada2_taxonomy: FALSE - flag_qiime2_taxonomy: FALSE - flag_sintax_taxonomy: FALSE - flag_pplace_taxonomy: FALSE - flag_skip_qiime: FALSE - flag_skip_barplot: FALSE - flag_skip_abundance_tables: FALSE - flag_skip_alpha_rarefaction: FALSE - flag_skip_diversity_indices: FALSE + barplot: FALSE + abundance_tables: FALSE + alpha_rarefaction: FALSE ancom: FALSE trunclenf: "" trunclenr: "" @@ -38,10 +27,10 @@ params: trunc_qmin: FALSE trunc_rmin: "" dada_sample_inference: "" - filter_ssu: "" + filter_ssu: FALSE min_len_asv: "" max_len_asv: "" - cut_its: "" + cut_its: FALSE dada2_ref_tax_title: "" qiime2_ref_tax_title: "" sintax_ref_tax_title: "" @@ -49,15 +38,15 @@ params: min_frequency: "" min_samples: "" qiime2_filtertaxa: "" - val_used_taxonomy: "" + val_used_taxonomy: FALSE metadata_category_barplot: FALSE qiime_adonis_formula: FALSE # file paths - mqc_plot: "" - ca_sum_path: "" + mqc_plot: FALSE + ca_sum_path: FALSE dada_filtntrim_args: "" - dada_qc_f_path: "" + dada_qc_f_path: FALSE dada_qc_r_path: "" dada_pp_qc_f_path: "" dada_pp_qc_r_path: "" @@ -67,25 +56,25 @@ params: path_asv_fa: "" path_dada2_tab: "" dada_stats_path: "" - path_barrnap_sum: "" + path_barrnap_sum: FALSE filter_ssu_stats: "" filter_ssu_asv: "" - filter_len_asv: "" + filter_len_asv: FALSE filter_len_asv_len_orig: "" - filter_codons: "" + filter_codons: FALSE stop_codons: "" itsx_cutasv_summary: "" - cut_dada_ref_taxonomy: "" - dada2_taxonomy: "" - sintax_taxonomy: "" - pplace_taxonomy: "" + cut_dada_ref_taxonomy: FALSE + dada2_taxonomy: FALSE + sintax_taxonomy: FALSE + pplace_taxonomy: FALSE pplace_heattree: "" - qiime2_taxonomy: "" - filter_stats_tsv: "" + qiime2_taxonomy: FALSE + filter_stats_tsv: FALSE diversity_indices_depth: "" - diversity_indices_beta: "" + diversity_indices_beta: FALSE diversity_indices_adonis: "" - picrust_pathways: "" + picrust_pathways: FALSE --- @@ -104,7 +93,7 @@ supporting denoising of any amplicon and supports a variety of taxonomic databas # Preprocessing -```{r, eval = !params$flag_skip_fastqc, results='asis'} +```{r, eval = !isFALSE(params$mqc_plot), results='asis'} mqc_rep_path <- paste0("../multiqc/multiqc_report.html") cat("## FastQC\n") @@ -117,11 +106,11 @@ cat("The sequence quality was checked using FastQC and resulting data was ", "MultiQC report, which is found [here](", mqc_rep_path, ").", sep = "") ``` -```{r, eval = !params$flag_skip_fastqc, out.width='100%', dpi=1200, fig.align='center'} +```{r, eval = !isFALSE(params$mqc_plot), out.width='100%', dpi=1200, fig.align='center'} knitr::include_graphics(params$mqc_plot) ``` -```{r, eval = !params$flag_skip_cutadapt, results='asis'} +```{r, eval = !isFALSE(params$ca_sum_path), results='asis'} cat("## Primer removal with Cutadapt\n") cat("Cutadapt is trimming primer sequences from sequencing reads. ", "Primer sequences are non-biological sequences that often introduce ", @@ -195,7 +184,7 @@ cat("Reads with more than", params$max_ee,"expected errors were discarded.", "column 'filtered'.", sep = " ") ``` -```{r, eval = !params$flag_skip_dada_quality, results='asis'} +```{r, eval = !isFALSE(params$dada_qc_f_path), results='asis'} cat ("**Quality profiles:**\n\n") if (params$flag_single_end) { @@ -205,7 +194,7 @@ if (params$flag_single_end) { } ``` -```{r, eval = !params$flag_skip_dada_quality, out.width="49%", fig.show='hold', fig.align='default'} +```{r, eval = !isFALSE(params$dada_qc_f_path), out.width="49%", fig.show='hold', fig.align='default'} if (params$flag_single_end) { knitr::include_graphics(params$dada_qc_f_path) } else { @@ -213,7 +202,7 @@ if (params$flag_single_end) { } ``` -```{r, eval = !params$flag_skip_dada_quality, results='asis'} +```{r, eval = !isFALSE(params$dada_qc_f_path), results='asis'} if (params$flag_single_end) { cat("Read quality stats for preprocessed data:") } else { @@ -221,7 +210,7 @@ if (params$flag_single_end) { } ``` -```{r, eval = !params$flag_skip_dada_quality, out.width="49%", fig.show='hold', fig.align='default'} +```{r, eval = !isFALSE(params$dada_qc_f_path), out.width="49%", fig.show='hold', fig.align='default'} if (params$flag_single_end) { knitr::include_graphics(params$dada_pp_qc_f_path) } else { @@ -229,7 +218,7 @@ if (params$flag_single_end) { } ``` -```{r, eval = !params$flag_skip_dada_quality, results='asis'} +```{r, eval = !isFALSE(params$dada_qc_f_path), results='asis'} cat("Overall read quality profiles as heat map of the frequency of each quality score at each base position. ", "The mean quality score at each position is shown by the green line, and the quartiles of the quality score ", "distribution by the orange lines. The red line shows the scaled proportion of reads that extend to at least ", @@ -429,14 +418,14 @@ if ( params$dada_sample_inference == "independent" ) { ``` ```{r, results='asis'} -flag_any_filtering <- !params$flag_skip_barrnap || isTRUE(params$filter_len_asv != "") || isTRUE(params$filter_codons != "") +flag_any_filtering <- !isFALSE(params$path_barrnap_sum) || !isFALSE(params$filter_len_asv) || !isFALSE(params$filter_codons) ``` ```{r, eval = flag_any_filtering, results='asis'} cat("# Filtering of ASVs\n") ``` -```{r, eval = !params$flag_skip_barrnap, results='asis'} +```{r, eval = !isFALSE(params$path_barrnap_sum), results='asis'} cat("## rRNA detection\n") cat("Barrnap classifies the ASVs into the origin domain (including mitochondiral origin).\n\n", sep = "") @@ -489,11 +478,7 @@ invisible(dev.off()) cat("\n\nrRNA filter results can be found in folder [barrnap](../barrnap).") ``` -```{r, results='asis'} -flag_filter_ssu <- !params$flag_skip_barrnap && isTRUE(params$filter_ssu != "none") -``` - -```{r, eval = flag_filter_ssu, results='asis'} +```{r, eval = !isFALSE(params$path_barrnap_sum) && !isFALSE(params$filter_ssu), results='asis'} cat("\n\nASVs were filtered for (",params$filter_ssu,") using the above classification.", "The following table shows read counts for each sample before and after filtering:\n\n", sep = "") @@ -521,11 +506,7 @@ cat("In average", round(filter_ssu_stats_avg_removed,2), "% reads were removed, cat("The number of ASVs was reduced by",n_asv-filter_ssu_asv_filtered,"(",100-round( filter_ssu_asv_filtered/n_asv*100 ,2),"%), from",n_asv,"to",filter_ssu_asv_filtered," ASVs.") ``` -```{r, results='asis'} -flag_filter_len_asv <- isTRUE(params$filter_len_asv != "") -``` - -```{r, eval = flag_filter_len_asv, results='asis'} +```{r, eval = !isFALSE(params$filter_len_asv), results='asis'} cat("## Sequence length\n") cat("A length filter was used to reduce potential contamination after ASV computation.", @@ -594,11 +575,7 @@ cat("The number of ASVs was reduced by",sum(filter_len_profile$Counts)-sum(filte cat("\n\nLength filter results can be found in folder [asv_length_filter](../asv_length_filter).") ``` -```{r, results='asis'} -flag_filter_codons <- isTRUE(params$filter_codons != "") -``` - -```{r, eval = flag_filter_codons, results='asis'} +```{r, eval = !isFALSE(params$filter_codons), results='asis'} cat("## Codon usage\n") cat("Amplicons of coding regions are expected to be free of stop codons and consist of condon tripletts.", @@ -623,7 +600,7 @@ cat("\n\nCodon usage filter results can be found in folder [codon_filter](../cod ```{r, results='asis'} # Check if any taxonomic classification is available -any_taxonomy <- params$flag_dada2_taxonomy || params$flag_qiime2_taxonomy || params$flag_sintax_taxonomy || params$flag_pplace_taxonomy +any_taxonomy <- !isFALSE(params$dada2_taxonomy) || !isFALSE(params$qiime2_taxonomy) || !isFALSE(params$sintax_taxonomy) || !isFALSE(params$pplace_taxonomy) ``` ```{r, eval = any_taxonomy, results='asis'} @@ -631,12 +608,7 @@ any_taxonomy <- params$flag_dada2_taxonomy || params$flag_qiime2_taxonomy || par cat("# Taxonomic Classification\n") ``` -```{r, results='asis'} -# Check if ITSX was used -flag_itsx_cutasv <- isTRUE(params$cut_its != "none") -``` - -```{r, eval = flag_itsx_cutasv, results='asis'} +```{r, eval = !isFALSE(params$cut_its), results='asis'} cat("## ITS region\n") cat("The ITS region was extracted from each ASV sequence using ITSx.", "Taxonomic classification should have improved performance based on extracted ITS sequence.\n") @@ -688,7 +660,7 @@ invisible(dev.off()) cat("\n\nITSx results can be found in folder [itsx](../itsx).") ``` -```{r, eval = params$flag_dada2_taxonomy, results='asis'} +```{r, eval = !isFALSE(params$dada2_taxonomy), results='asis'} cat("## DADA2\n") # indicate reference taxonomy @@ -701,7 +673,7 @@ if (!params$flag_ref_tax_user) { } # mention if taxonomy was cut by cutadapt -if ( isTRUE(params$cut_dada_ref_taxonomy != "") ) { +if ( !isFALSE(params$cut_dada_ref_taxonomy) ) { cut_dada_ref_taxonomy <- readLines(params$cut_dada_ref_taxonomy) for (line in cut_dada_ref_taxonomy){ if (grepl("Total reads processed:", line)) { @@ -781,7 +753,7 @@ invisible(dev.off()) cat("\n\nDADA2 taxonomy assignments can be found in folder [dada2](../dada2) in files 'ASV_tax_*.tsv'.") ``` -```{r, eval = params$flag_qiime2_taxonomy, results='asis'} +```{r, eval = !isFALSE(params$qiime2_taxonomy), results='asis'} # Header cat("## QIIME2\n") @@ -847,7 +819,7 @@ invisible(dev.off()) cat("\n\nQIIME2 taxonomy assignments can be found in folder [qiime2/taxonomy](../qiime2/taxonomy).") ``` -```{r, eval = params$flag_sintax_taxonomy, results='asis'} +```{r, eval = !isFALSE(params$sintax_taxonomy), results='asis'} # Header cat("## SINTAX\n") @@ -909,7 +881,7 @@ invisible(dev.off()) cat("\n\nSINTAX taxonomy assignments can be found in folder [sintax](../sintax).") ``` -```{r, eval = params$flag_pplace_taxonomy, results='asis'} +```{r, eval = !isFALSE(params$pplace_taxonomy), results='asis'} # Header cat("## Phylogenetic Placement\n", "Phylogenetic placement grafts sequences onto a phylogenetic reference tree and optionally outputs taxonomic annotations. The reference tree is ideally made from full-length high-quality sequences containing better evolutionary signal than short amplicons. It is hence superior to estimating de-novo phylogenetic trees from short amplicon sequences. ", @@ -966,26 +938,22 @@ invisible(dev.off()) cat("\n\nHeattree of the phylogenetic placement:") ``` -```{r, eval = params$flag_pplace_taxonomy, out.width="100%", fig.show='hold', fig.align='default'} +```{r, eval = !isFALSE(params$pplace_taxonomy), out.width="100%", fig.show='hold', fig.align='default'} knitr::include_graphics(c(params$pplace_heattree)) ``` -```{r, eval = params$flag_pplace_taxonomy, results='asis'} +```{r, eval = !isFALSE(params$pplace_taxonomy), results='asis'} cat("\n\nPhylogenetic placement taxonomy assignments can be found in folder [pplace](../pplace) in file '*.taxonomy.per_query_unique.tsv'.") ``` -```{r, eval = !params$flag_skip_qiime, results='asis'} +```{r, eval = !isFALSE(params$val_used_taxonomy), results='asis'} # Header cat("# Downstream analysis with QIIME2\n", "Files that were input to QIIME2 can be found in folder [qiime2/input/](../qiime2/input/).", "Results of taxonomic classification of",params$val_used_taxonomy,"was used in all following analysis, see in the above sections.") ``` -```{r, results='asis'} -flag_filter_stats_tsv <- isTRUE(params$filter_stats_tsv != "") -``` - -```{r, eval = flag_filter_stats_tsv, results='asis'} +```{r, eval = !isFALSE(params$filter_stats_tsv), results='asis'} cat("## ASV filtering\n", "Unwanted taxa are often off-targets generated in PCR with primers that are not perfectly specific for the target DNA, for 16S rRNA sequencing mitrochondria and chloroplast sequences are typically removed because these are frequent unwanted non-bacteria PCR products. ") if ( params$exclude_taxa != "none" ) { @@ -1024,7 +992,7 @@ datatable(filter_stats_tsv, options = list( cat("\n\nTables with read count numbers and filtered abundance tables are in folder [qiime2/abundance_tables](../qiime2/abundance_tables).") ``` -```{r, eval = !params$flag_skip_abundance_tables, results='asis'} +```{r, eval = !isFALSE(params$abundance_tables), results='asis'} cat("## Abundance tables\n", "The abundance tables are the final data for further downstream analysis and visualisations. The tables are based on the computed ASVs and taxonomic classification, but after removal of unwanted taxa. ", "Folder [qiime2/abundance_tables](../qiime2/abundance_tables) contains tap-separated files (.tsv) that can be opened by any spreadsheet software.") @@ -1034,14 +1002,14 @@ cat("\n\n## Relative abundance tables\n", "Folder [qiime2/rel_abundance_tables](../qiime2/rel_abundance_tables) contains tap-separated files (.tsv) that can be opened by any spreadsheet software.") ``` -```{r, eval = !params$flag_skip_barplot, results='asis'} +```{r, eval = !isFALSE(params$barplot), results='asis'} cat("## Barplot\n", "Interactive abundance plot that aids exploratory browsing the discovered taxa and their abundance", "in samples and allows sorting for associated meta data.", "Folder [qiime2/barplot](../qiime2/barplot) contains barplots, click [qiime2/barplot/index.html](../qiime2/barplot/index.html) to open it in your web browser.") ``` -```{r, eval = params$metadata_category_barplot, results='asis'} +```{r, eval = !isFALSE(params$metadata_category_barplot), results='asis'} cat("\n\nAdditionally, barplots with average relative abundance values were produced for", params$metadata_category_barplot,"(comma separated if several) in [qiime2/barplot_average](../qiime2/barplot_average) in separate folders following the scheme 'barplot_{treatment}':\n") @@ -1052,7 +1020,7 @@ for (category in metadata_category_barplot) { } ``` -```{r, eval = !params$flag_skip_alpha_rarefaction, results='asis'} +```{r, eval = !isFALSE(params$alpha_rarefaction), results='asis'} cat("## Alpha diversity rarefaction curves\n", "Produces rarefaction plots for several alpha diversity indices, and is primarily used to determine if the richness of the samples has been fully observed or sequenced. If the slope of the curves does not level out and the lines do not becomes horizontal, this might be because the sequencing depth was too low to observe all diversity or that sequencing error artificially increases sequence diversity and causes false discoveries. ") # warning if dada_sample_inference is independent, because alpha diversities are not expected to be accurate! @@ -1062,7 +1030,7 @@ if ( params$dada_sample_inference == "independent") { cat("Folder [qiime2/alpha-rarefaction](../qiime2/alpha-rarefaction) contains the data, click [qiime2/alpha-rarefaction/index.html](../qiime2/alpha-rarefaction/index.html) to open it in your web browser.") ``` -```{r, eval = !params$flag_skip_diversity_indices, results='asis'} +```{r, eval = !isFALSE(params$diversity_indices_beta), results='asis'} diversity_indices_depth <- readLines(params$diversity_indices_depth) cat("## Diversity analysis\n", @@ -1096,7 +1064,7 @@ for (folder in diversity_indices_beta) { } ``` -```{r, eval = params$qiime_adonis_formula, results='asis'} +```{r, eval = !isFALSE(params$qiime_adonis_formula), results='asis'} cat("_ADONIS test for beta diversity_\n\n") cat("Permutational multivariate analysis of variance using distance matrices (adonis) determines whether groups of samples are significantly different from one another. @@ -1114,7 +1082,7 @@ for (folder in diversity_indices_adonis) { } ``` -```{r, eval = params$ancom, results='asis'} +```{r, eval = !isFALSE(params$ancom), results='asis'} cat("## ANCOM\n\n") cat("Analysis of Composition of Microbiomes (ANCOM) is applied to identify features that are differentially abundant across sample groups. A key assumption made by ANCOM is that few taxa (less than about 25%) @@ -1131,11 +1099,7 @@ for (folder in ancom) { } ``` -```{r, results='asis'} -flag_picrust_pathways <- isTRUE(params$picrust_pathways != "") -``` - -```{r, eval = flag_picrust_pathways, results='asis'} +```{r, eval = !isFALSE(params$picrust_pathways), results='asis'} cat("## PICRUSt2\n", "PICRUSt2 (Phylogenetic Investigation of Communities by Reconstruction of Unobserved States) is a software for predicting functional abundances based only on marker gene sequences.", "Enzyme Classification numbers (EC), KEGG orthologs (KO) and MetaCyc ontology predictions were made for each sample.", diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 3a30404d..817eb11c 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -70,17 +70,15 @@ process SUMMARY_REPORT { // all elements must have a value, i.e. booleans also need to be set to TRUE def params_list_named = [ meta.single_end ? "flag_single_end=TRUE" : "", - params.skip_fastqc || params.skip_multiqc ? - "flag_skip_fastqc=TRUE" : - "mqc_plot='${mqc_plots}/svg/mqc_fastqc_per_sequence_quality_scores_plot_1.svg'", - params.skip_cutadapt ? "flag_skip_cutadapt=TRUE" : + params.skip_fastqc || params.skip_multiqc ? "" : "mqc_plot='${mqc_plots}/svg/mqc_fastqc_per_sequence_quality_scores_plot_1.svg'", + params.skip_cutadapt ? "" : params.retain_untrimmed ? "flag_retain_untrimmed=TRUE,ca_sum_path='$ca_summary'" : "ca_sum_path='$ca_summary'", find_truncation_values ? "trunc_qmin=$params.trunc_qmin,trunc_rmin=$params.trunc_rmin" : "", "trunclenf='$params.trunclenf'", "trunclenr='$params.trunclenr'", "max_ee=$params.max_ee", - params.skip_dada_quality ? "flag_skip_dada_quality=TRUE" : + params.skip_dada_quality ? "" : meta.single_end ? "dada_qc_f_path='$dada_qual_stats',dada_pp_qc_f_path='$dada_pp_qual_stats'" : "dada_qc_f_path='FW_qual_stats.svg',dada_qc_r_path='RV_qual_stats.svg',dada_pp_qc_f_path='FW_preprocessed_qual_stats.svg',dada_pp_qc_r_path='RV_preprocessed_qual_stats.svg'", "dada_filtntrim_args='$dada_filtntrim_args'", @@ -92,27 +90,27 @@ process SUMMARY_REPORT { "path_asv_fa='$dada_asv_fa'", "path_dada2_tab='$dada_tab'", "dada_stats_path='$dada_stats'", - params.skip_barrnap ? "flag_skip_barrnap=TRUE" : "path_barrnap_sum='$barrnap_summary'", - filter_ssu_stats ? "filter_ssu_stats='$filter_ssu_stats',filter_ssu_asv='$filter_ssu_asv',filter_ssu='$params.filter_ssu'" : "filter_ssu='none'", + params.skip_barrnap ? "" : "path_barrnap_sum='$barrnap_summary'", + filter_ssu_stats ? "filter_ssu_stats='$filter_ssu_stats',filter_ssu_asv='$filter_ssu_asv',filter_ssu='$params.filter_ssu'" : "", filter_len_asv_stats ? "filter_len_asv='$filter_len_asv_stats',filter_len_asv_len_orig='$filter_len_asv_len_orig'" : "", params.min_len_asv ? "min_len_asv=$params.min_len_asv" : "min_len_asv=0", params.max_len_asv ? "max_len_asv=$params.max_len_asv" : "max_len_asv=0", filter_codons_stats ? "filter_codons='$filter_codons_stats',stop_codons='$params.stop_codons'" : "", - itsx_cutasv_summary ? "itsx_cutasv_summary='$itsx_cutasv_summary',cut_its='$params.cut_its'" : "cut_its='none'", + itsx_cutasv_summary ? "itsx_cutasv_summary='$itsx_cutasv_summary',cut_its='$params.cut_its'" : "", !dada2_tax ? "" : - params.dada_ref_tax_custom ? "flag_dada2_taxonomy=TRUE,dada2_taxonomy='$dada2_tax',flag_ref_tax_user=TRUE" : - "flag_dada2_taxonomy=TRUE,dada2_taxonomy='$dada2_tax',dada2_ref_tax_title='${params.dada_ref_databases[params.dada_ref_taxonomy]["title"]}'", + params.dada_ref_tax_custom ? "dada2_taxonomy='$dada2_tax',flag_ref_tax_user=TRUE" : + "dada2_taxonomy='$dada2_tax',dada2_ref_tax_title='${params.dada_ref_databases[params.dada_ref_taxonomy]["title"]}'", cut_dada_ref_taxonomy ? "cut_dada_ref_taxonomy='$cut_dada_ref_taxonomy'" : "", - sintax_tax ? "flag_sintax_taxonomy=TRUE,sintax_taxonomy='$sintax_tax',sintax_ref_tax_title='${params.sintax_ref_databases[params.sintax_ref_taxonomy]["title"]}'" : "", - pplace_tax ? "flag_pplace_taxonomy=TRUE,pplace_taxonomy='$pplace_tax',pplace_heattree='$pplace_heattree'" : "", - qiime2_tax ? "flag_qiime2_taxonomy=TRUE,qiime2_taxonomy='$qiime2_tax',qiime2_ref_tax_title='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}'" : "", - run_qiime2 ? "val_used_taxonomy='$val_used_taxonomy'" : "flag_skip_qiime=TRUE", + sintax_tax ? "sintax_taxonomy='$sintax_tax',sintax_ref_tax_title='${params.sintax_ref_databases[params.sintax_ref_taxonomy]["title"]}'" : "", + pplace_tax ? "pplace_taxonomy='$pplace_tax',pplace_heattree='$pplace_heattree'" : "", + qiime2_tax ? "qiime2_taxonomy='$qiime2_tax',qiime2_ref_tax_title='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}'" : "", + run_qiime2 ? "val_used_taxonomy='$val_used_taxonomy'" : "", filter_stats_tsv ? "filter_stats_tsv='$filter_stats_tsv',qiime2_filtertaxa='$qiime2_filtertaxa',exclude_taxa='$params.exclude_taxa',min_frequency='$params.min_frequency',min_samples='$params.min_samples'" : "", - barplot ? "" : "flag_skip_barplot=TRUE", + barplot ? "barplot=TRUE" : "", barplot && params.metadata_category_barplot ? "metadata_category_barplot='$params.metadata_category_barplot'" : "", - abundance_tables ? "" : "flag_skip_abundance_tables=TRUE", - alpha_rarefaction ? "" : "flag_skip_alpha_rarefaction=TRUE", - diversity_indices ? "diversity_indices_depth='$diversity_indices',diversity_indices_beta='"+ diversity_indices_beta.join(",") +"'" : "flag_skip_diversity_indices=TRUE", + abundance_tables ? "abundance_tables=TRUE" : "", + alpha_rarefaction ? "alpha_rarefaction=TRUE" : "", + diversity_indices ? "diversity_indices_depth='$diversity_indices',diversity_indices_beta='"+ diversity_indices_beta.join(",") +"'" : "", diversity_indices_adonis ? "diversity_indices_adonis='"+ diversity_indices_adonis.join(",") +"',qiime_adonis_formula='$params.qiime_adonis_formula'" : "", ancom ? "ancom='"+ ancom.join(",") +"'" : "", ] From 44ffe481eedbd21b3713246576f5949986301c40 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 11 Jul 2023 13:26:58 +0200 Subject: [PATCH 080/230] make input optional --- assets/report_template.Rmd | 128 +++++++++++++++++--------------- modules/local/summary_report.nf | 33 ++++---- workflows/ampliseq.nf | 20 ++--- 3 files changed, 92 insertions(+), 89 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 096940a0..b6f88cd6 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -45,22 +45,22 @@ params: # file paths mqc_plot: FALSE ca_sum_path: FALSE - dada_filtntrim_args: "" + dada_filtntrim_args: FALSE dada_qc_f_path: FALSE dada_qc_r_path: "" dada_pp_qc_f_path: "" dada_pp_qc_r_path: "" - dada_err_path: "" + dada_err_path: FALSE dada_err_run: "" - asv_table_path: "" - path_asv_fa: "" - path_dada2_tab: "" - dada_stats_path: "" + asv_table_path: FALSE + path_asv_fa: FALSE + path_dada2_tab: FALSE + dada_stats_path: FALSE path_barrnap_sum: FALSE filter_ssu_stats: "" filter_ssu_asv: "" filter_len_asv: FALSE - filter_len_asv_len_orig: "" + filter_len_asv_len_orig: FALSE filter_codons: FALSE stop_codons: "" itsx_cutasv_summary: "" @@ -91,7 +91,9 @@ knitr::opts_chunk$set(echo = FALSE) The bioinformatics analysis pipeline [nfcore/ampliseq](https://nf-co.re/ampliseq) is used for amplicon sequencing, supporting denoising of any amplicon and supports a variety of taxonomic databases for taxonomic assignment of 16S, ITS, CO1 and 18S amplicons. -# Preprocessing +```{r, eval = !isFALSE(params$mqc_plot) || !isFALSE(params$dada_filtntrim_args), results='asis'} +cat("# Preprocessing\n") +``` ```{r, eval = !isFALSE(params$mqc_plot), results='asis'} mqc_rep_path <- paste0("../multiqc/multiqc_report.html") @@ -151,9 +153,8 @@ datatable(cutadapt_summary, options = list( cat("\n\nCutadapt results can be found in folder [cutadapt](../cutadapt).") ``` -## Quality filtering using DADA2 - -```{r, results='asis'} +```{r, eval = !isFALSE(params$dada_filtntrim_args), results='asis'} +cat("## Quality filtering using DADA2\n\n") cat("Additional quality filtering can improve sequence recovery. ", "Often it is advised trimming the last few nucleotides to avoid less well-controlled errors that can arise there. ") @@ -225,20 +226,19 @@ cat("Overall read quality profiles as heat map of the frequency of each quality "that position. Original plots can be found [folder dada2/QC/](../dada2/QC/) with names that end in '_qual_stats.pdf'.") ``` -# ASV inference using DADA2 - -DADA2 performs fast and accurate sample inference from amplicon data with single-nucleotide resolution. -It infers exact amplicon sequence variants (ASVs) from amplicon data with fewer false positives than -many other methods while maintaining high sensitivity. - -DADA2 reduces sequence errors and dereplicates sequences by quality filtering, denoising, -read pair merging (for paired end Illumina reads only) and PCR chimera removal. - -## Error correction +```{r, eval = !isFALSE(params$dada_err_path) || !isFALSE(params$dada_stats_path) || !isFALSE(params$asv_table_path), results='asis'} +cat("# ASV inference using DADA2\n\n", + "DADA2 performs fast and accurate sample inference from amplicon data with single-nucleotide resolution. + It infers exact amplicon sequence variants (ASVs) from amplicon data with fewer false positives than + many other methods while maintaining high sensitivity.\n\n", + "DADA2 reduces sequence errors and dereplicates sequences by quality filtering, denoising, + read pair merging (for paired end Illumina reads only) and PCR chimera removal.") +``` -Read error correction was performed using estimated error rates, visualized below. +```{r, eval = !isFALSE(params$dada_err_path), results='asis'} +cat("## Error correction\n\n", + "Read error correction was performed using estimated error rates, visualized below.\n") -```{r, results='asis'} # check if single run or multirun flag_multirun = length ( unlist( strsplit( params$dada_err_run,"," ) ) ) != 1 @@ -254,26 +254,26 @@ if ( flag_multirun && params$flag_single_end ) { } else if ( !flag_multirun && !params$flag_single_end ) { # paired end single run cat("Error rates for forward reads are at the left side and reverse reads are at the right side.") - } ``` -```{r, out.width="49%", fig.show='hold', fig.align='default'} +```{r, eval = !isFALSE(params$dada_err_path), out.width="49%", fig.show='hold', fig.align='default'} dada_err_path <- unlist( strsplit( params$dada_err_path,"," ) ) knitr::include_graphics(dada_err_path) ``` -Estimated error rates for each possible transition. The black line shows the estimated error rates after -convergence of the machine-learning algorithm. The red line shows the error rates expected under the nominal -definition of the Q-score. The estimated error rates (black line) should be a good fit to the observed rates -(points), and the error rates should drop with increased quality. Original plots can be found in -[folder dada2/QC/](../dada2/QC/) with names that end in '.err.pdf'. - -## Read counts per sample +```{r, eval = !isFALSE(params$dada_err_path), results='asis'} +cat("Estimated error rates for each possible transition. The black line shows the estimated error rates after + convergence of the machine-learning algorithm. The red line shows the error rates expected under the nominal + definition of the Q-score. The estimated error rates (black line) should be a good fit to the observed rates + (points), and the error rates should drop with increased quality. Original plots can be found in + [folder dada2/QC/](../dada2/QC/) with names that end in '.err.pdf'.") +``` -Tracking read numbers through DADA2 processing steps, for each sample. In the following table are read numbers after each processing stage. +```{r, eval = !isFALSE(params$dada_stats_path), results='asis'} +cat("## Read counts per sample\n\n", + "Tracking read numbers through DADA2 processing steps, for each sample. In the following table are read numbers after each processing stage.\n") -```{r, results='asis'} if ( params$flag_single_end ) { cat("Processing stages are: input - reads into DADA2, filtered - reads passed quality filtering, ", "denoised - reads after denoising, nonchim - reads in non-chimeric sequences (final ASVs)") @@ -291,12 +291,11 @@ datatable(dada_stats, options = list( scrollX = TRUE, scrollY = "300px", paging = FALSE)) -``` -Samples with unusual low reads numbers relative to the number of expected ASVs (e.g. 500 reads with 100 ASVs) -should be treated cautiously, because the abundance estimate will be very granular and might vary strongly between (theoretical) -replicates due to high impact of stochasticity. -```{r, results='asis'} +cat("Samples with unusual low reads numbers relative to the number of expected ASVs (e.g. 500 reads with 100 ASVs) + should be treated cautiously, because the abundance estimate will be very granular and might vary strongly between (theoretical) + replicates due to high impact of stochasticity. ") + # Stacked barchart to num of reads # Calc exluded asvs and transform all cols to percent @@ -387,16 +386,15 @@ svg("stacked_barchart_of_reads.svg") plot_dada_stats_p_t invisible(dev.off()) -cat("\n\nBetween",min(dada_stats_p$analysis),"% and",max(dada_stats_p$analysis),"% reads per sample were retained for analysis within DADA2 steps.") +cat("\n\nBetween",min(dada_stats_p$analysis),"% and",max(dada_stats_p$analysis),"% reads per sample were retained for analysis within DADA2 steps.\n\n", + "The proportion of lost reads per processing stage and sample should not be too high, totalling typically <50%. + Samples that are very different in lost reads (per stage) to the majority of samples must be compared with caution, because an unusual problem + (e.g. during nucleotide extraction, library preparation, or sequencing) could have occurred that might add bias to the analysis.") ``` -The proportion of lost reads per processing stage and sample should not be too high, totalling typically <50%. -Samples that are very different in lost reads (per stage) to the majority of samples must be compared with caution, because an unusual problem -(e.g. during nucleotide extraction, library preparation, or sequencing) could have occurred that might add bias to the analysis. +```{r, eval = !isFALSE(params$asv_table_path), results='asis'} +cat("## Inferred ASVs\n\n") -## Inferred ASVs - -```{r, results='asis'} #import asv table asv_table <- read.table(file = params$asv_table_path, header = TRUE, sep = "\t") n_asv <- length(asv_table$ASV_ID) @@ -439,8 +437,8 @@ barrnap_sum$result = colnames(barrnap_sum[,2:5])[apply(barrnap_sum[,2:5],1,which barrnap_sum$result = gsub("_eval", "", barrnap_sum$result) #import asv table -asv_table <- read.table(file = params$asv_table_path, header = TRUE, sep = "\t") -n_asv <- length(asv_table$ASV_ID) +asv_table <- readLines(params$path_asv_fa) +n_asv <- sum(grepl("^>", asv_table)) # calculate numbers n_classified <- length(barrnap_sum$result) @@ -506,7 +504,7 @@ cat("In average", round(filter_ssu_stats_avg_removed,2), "% reads were removed, cat("The number of ASVs was reduced by",n_asv-filter_ssu_asv_filtered,"(",100-round( filter_ssu_asv_filtered/n_asv*100 ,2),"%), from",n_asv,"to",filter_ssu_asv_filtered," ASVs.") ``` -```{r, eval = !isFALSE(params$filter_len_asv), results='asis'} +```{r, eval = !isFALSE(params$filter_len_asv_len_orig), results='asis'} cat("## Sequence length\n") cat("A length filter was used to reduce potential contamination after ASV computation.", @@ -542,10 +540,26 @@ svg("asv_length_profile_before_length_filter.svg") plot_filter_len_profile invisible(dev.off()) -# Reads removed +cat("\n\n") +if ( params$min_len_asv != 0 && params$max_len_asv != 0 ) { + cat("Filtering omitted all ASVs with length lower than",params$min_len_asv,"or above",params$max_len_asv,"bp. ") +} else if ( params$min_len_asv != 0 ) { + cat("Filtering omitted all ASVs with length lower than",params$min_len_asv,"bp. ") +} else if ( params$max_len_asv != 0 ) { + cat("Filtering omitted all ASVs with length above",params$max_len_asv,"bp. ") +} +``` +```{r, eval = !isFALSE(params$filter_len_asv), results='asis'} # import stats tsv filter_len_stats <- read.table(file = params$filter_len_asv, header = TRUE, sep = "\t") +# only if file not empty continue with reporting below +flag_filter_len_stats <- nrow(filter_len_stats) > 0 +``` + +```{r, eval = !isFALSE(params$filter_len_asv) && flag_filter_len_stats, results='asis'} +# Reads removed + # re-name & re-order columns colnames(filter_len_stats) <- gsub("lenfilter_","",colnames(filter_len_stats)) filter_len_stats <- filter_len_stats[, c("sample", "input", "output")] @@ -553,14 +567,6 @@ filter_len_stats$'retained%' <- round( filter_len_stats$output / filter_len_stat filter_len_stats_avg_removed <- 100-sum(filter_len_stats$'retained%')/length(filter_len_stats$'retained%') filter_len_stats_max_removed <- 100-min(filter_len_stats$'retained%') -cat("\n\n") -if ( params$min_len_asv != 0 && params$max_len_asv != 0 ) { - cat("Filtering omitted all ASVs with length lower than",params$min_len_asv,"or above",params$max_len_asv,"bp. ") -} else if ( params$min_len_asv != 0 ) { - cat("Filtering omitted all ASVs with length lower than",params$min_len_asv,"bp. ") -} else if ( params$max_len_asv != 0 ) { - cat("Filtering omitted all ASVs with length above",params$max_len_asv,"bp. ") -} cat("The following table shows (read) counts for each sample before and after filtering:") # Display table @@ -569,9 +575,11 @@ datatable(filter_len_stats, options = list( scrollY = "300px", paging = FALSE)) -cat("In average", filter_len_stats_avg_removed, "% reads were removed, but at most",filter_len_stats_max_removed,"% reads per sample. ") -cat("The number of ASVs was reduced by",sum(filter_len_profile$Counts)-sum(filter_len_asv_filtered$Counts),"(",100-round( sum(filter_len_asv_filtered$Counts)/sum(filter_len_profile$Counts)*100 ,2),"%), from",sum(filter_len_profile$Counts),"to",sum(filter_len_asv_filtered$Counts)," ASVs.") +cat("In average", filter_len_stats_avg_removed, "% reads were removed, but at most",filter_len_stats_max_removed,"% reads per sample.") +``` +```{r, eval = !isFALSE(params$filter_len_asv_len_orig), results='asis'} +cat("The number of ASVs was reduced by",sum(filter_len_profile$Counts)-sum(filter_len_asv_filtered$Counts),"(",100-round( sum(filter_len_asv_filtered$Counts)/sum(filter_len_profile$Counts)*100 ,2),"%), from",sum(filter_len_profile$Counts),"to",sum(filter_len_asv_filtered$Counts)," ASVs.") cat("\n\nLength filter results can be found in folder [asv_length_filter](../asv_length_filter).") ``` diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 817eb11c..f9b34c6f 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -60,39 +60,34 @@ process SUMMARY_REPORT { task.ext.when == null || task.ext.when script: - - - - def picrust = picrust_pathways ? "--picrust_pathways $picrust_pathways" : "" - // make named R list (comma separated) // all non-boolean or non-numeric values must be encumbered by single quotes (')! // all elements must have a value, i.e. booleans also need to be set to TRUE def params_list_named = [ meta.single_end ? "flag_single_end=TRUE" : "", - params.skip_fastqc || params.skip_multiqc ? "" : "mqc_plot='${mqc_plots}/svg/mqc_fastqc_per_sequence_quality_scores_plot_1.svg'", - params.skip_cutadapt ? "" : + mqc_plots ? "mqc_plot='${mqc_plots}/svg/mqc_fastqc_per_sequence_quality_scores_plot_1.svg'" : "", + ca_summary ? params.retain_untrimmed ? "flag_retain_untrimmed=TRUE,ca_sum_path='$ca_summary'" : - "ca_sum_path='$ca_summary'", + "ca_sum_path='$ca_summary'" : "", find_truncation_values ? "trunc_qmin=$params.trunc_qmin,trunc_rmin=$params.trunc_rmin" : "", "trunclenf='$params.trunclenf'", "trunclenr='$params.trunclenr'", "max_ee=$params.max_ee", - params.skip_dada_quality ? "" : - meta.single_end ? "dada_qc_f_path='$dada_qual_stats',dada_pp_qc_f_path='$dada_pp_qual_stats'" : - "dada_qc_f_path='FW_qual_stats.svg',dada_qc_r_path='RV_qual_stats.svg',dada_pp_qc_f_path='FW_preprocessed_qual_stats.svg',dada_pp_qc_r_path='RV_preprocessed_qual_stats.svg'", - "dada_filtntrim_args='$dada_filtntrim_args'", + dada_qual_stats && meta.single_end ? "dada_qc_f_path='$dada_qual_stats',dada_pp_qc_f_path='$dada_pp_qual_stats'" : + dada_qual_stats ? "dada_qc_f_path='FW_qual_stats.svg',dada_qc_r_path='RV_qual_stats.svg',dada_pp_qc_f_path='FW_preprocessed_qual_stats.svg',dada_pp_qc_r_path='RV_preprocessed_qual_stats.svg'" : "", + dada_filtntrim_args ? "dada_filtntrim_args='$dada_filtntrim_args'" : "", "dada_sample_inference='$params.sample_inference'", - meta.run.size() == 1 && meta.single_end ? + dada_err_svgs && meta.run.size() == 1 && meta.single_end ? "dada_err_path='$dada_err_svgs',dada_err_run='"+meta.run+"'" : - "dada_err_path='"+dada_err_svgs.join(',')+"',dada_err_run='"+meta.run.join(',')+"'", - "asv_table_path='$dada_asv_table'", - "path_asv_fa='$dada_asv_fa'", - "path_dada2_tab='$dada_tab'", - "dada_stats_path='$dada_stats'", + dada_err_svgs ? "dada_err_path='"+dada_err_svgs.join(',')+"',dada_err_run='"+meta.run.join(',')+"'" : "", + dada_asv_table ? "asv_table_path='$dada_asv_table'" : "", + dada_asv_fa ? "path_asv_fa='$dada_asv_fa'": "", + dada_tab ? "path_dada2_tab='$dada_tab'" : "", + dada_stats ? "dada_stats_path='$dada_stats'" : "", params.skip_barrnap ? "" : "path_barrnap_sum='$barrnap_summary'", filter_ssu_stats ? "filter_ssu_stats='$filter_ssu_stats',filter_ssu_asv='$filter_ssu_asv',filter_ssu='$params.filter_ssu'" : "", - filter_len_asv_stats ? "filter_len_asv='$filter_len_asv_stats',filter_len_asv_len_orig='$filter_len_asv_len_orig'" : "", + filter_len_asv_stats ? "filter_len_asv='$filter_len_asv_stats'" : "", + filter_len_asv_len_orig ? "filter_len_asv_len_orig='$filter_len_asv_len_orig'" : "", params.min_len_asv ? "min_len_asv=$params.min_len_asv" : "min_len_asv=0", params.max_len_asv ? "max_len_asv=$params.max_len_asv" : "max_len_asv=0", filter_codons_stats ? "filter_codons='$filter_codons_stats',stop_codons='$params.stop_codons'" : "", diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index cb3740c0..12a91173 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -675,12 +675,12 @@ workflow AMPLISEQ { SUMMARY_REPORT ( Channel.fromPath("${baseDir}/assets/report_template.Rmd"), Channel.fromPath("${baseDir}/assets/report_styles.css"), - !params.skip_multiqc ? MULTIQC.out.plots : [], //.collect().flatten().collectFile(name: "mqc_fastqc_per_sequence_quality_scores_plot_1.svg") - !params.skip_cutadapt ? CUTADAPT_WORKFLOW.out.summary.collect() : [], + !is_fasta_input && !params.skip_fastqc && !params.skip_multiqc ? MULTIQC.out.plots : [], //.collect().flatten().collectFile(name: "mqc_fastqc_per_sequence_quality_scores_plot_1.svg") + !params.skip_cutadapt ? CUTADAPT_WORKFLOW.out.summary.collect().ifEmpty( [] ) : [], find_truncation_values, - DADA2_PREPROCESSING.out.args.first(), - !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg : [], - !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg_preprocessed : [], + DADA2_PREPROCESSING.out.args.first().ifEmpty( [] ), + !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg.ifEmpty( [] ) : [], + !params.skip_dada_quality ? DADA2_PREPROCESSING.out.qc_svg_preprocessed.ifEmpty( [] ) : [], DADA2_ERR.out.svg .map { meta_old, svgs -> @@ -694,11 +694,11 @@ workflow AMPLISEQ { meta.single_end = meta_old.single_end meta.run = runs.flatten() [ meta, svgs.flatten() ] - }, - DADA2_MERGE.out.asv, - DADA2_MERGE.out.fasta, - DADA2_MERGE.out.dada2asv, - DADA2_MERGE.out.dada2stats, + }.ifEmpty( [[],[]] ), + DADA2_MERGE.out.asv.ifEmpty( [] ), + ch_unfiltered_fasta.ifEmpty( [] ), // this is identical to DADA2_MERGE.out.fasta if !is_fasta_input + DADA2_MERGE.out.dada2asv.ifEmpty( [] ), + DADA2_MERGE.out.dada2stats.ifEmpty( [] ), !params.skip_barrnap ? BARRNAPSUMMARY.out.summary : [], params.filter_ssu ? FILTER_SSU.out.stats : [], params.filter_ssu ? FILTER_SSU.out.asv : [], From 055160157451ea607691d58dc6814804e1441e8c Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 11 Jul 2023 14:45:35 +0200 Subject: [PATCH 081/230] change to nf-core style css --- assets/nf-core_style.css | 70 +++++++++++++++++++++++++++++++++ assets/report_styles.css | 29 -------------- modules/local/summary_report.nf | 3 ++ workflows/ampliseq.nf | 2 +- 4 files changed, 74 insertions(+), 30 deletions(-) create mode 100644 assets/nf-core_style.css delete mode 100644 assets/report_styles.css diff --git a/assets/nf-core_style.css b/assets/nf-core_style.css new file mode 100644 index 00000000..0195a723 --- /dev/null +++ b/assets/nf-core_style.css @@ -0,0 +1,70 @@ +body { + font-family: Calibri, helvetica, sans-serif; +} + +h1 { + color: rgb(36, 176, 100); + font-size: 200%; +} + +h2 { + color: rgb(36, 176, 100); + font-size: 150%; +} + +h3 { + font-size: 100%; + font-weight: bold; +} + +h3.subtitle { + font-size: 120%; + color: rgb(0, 0, 0); + font-weight: bold; +} + +h4 { + font-size: 100%; + font-weight: bold; + font-style: italic; +} + +.watermark { + opacity: 0.1; + position: fixed; + top: 50%; + left: 50%; + font-size: 500%; + color: #24b064; +} + +.list-group-item.active { + z-index: 2; + color: #fff; + background-color: #24b064; + border-color: #24b064; +} +.list-group-item.active:hover { + z-index: 2; + color: #fff; + background-color: #24b064; + border-color: #24b064; +} + +#TOC { + background-size: contain; + padding-top: 60px !important; + background-repeat: no-repeat; +} + +.nav-pills > li.active > a, +.nav-pills > li.active > a:hover, +.nav-pills > li.active > a:focus { + color: #fff; + background-color: #24b064; +} + +a { + color: #24b064; + text-decoration: none; +} diff --git a/assets/report_styles.css b/assets/report_styles.css deleted file mode 100644 index 4e1988dc..00000000 --- a/assets/report_styles.css +++ /dev/null @@ -1,29 +0,0 @@ -body { - font-family: Calibri, helvetica, sans-serif; - background: none transparent; -} - -h1 { - color: rgb(3, 101, 192); - font-size: 127%; -} - -.title { - margin-right: 200px; -} - -h2 { - color: rgb(3, 101, 192); - font-size: 121%; -} - -h3 { - font-size: 109%; - font-weight: bold; -} - -h4 { - font-size: 100%; - font-weight: bold; - font-style: italic; -} diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index f9b34c6f..4c86b6fc 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -115,6 +115,9 @@ process SUMMARY_REPORT { #!/usr/bin/env Rscript library(rmarkdown) + # Rename .css file to be piced up by Rmd file + file.copy("./${report_styles}", "./report_styles.css", overwrite = TRUE) + # Work around https://github.com/rstudio/rmarkdown/issues/1508 # If the symbolic link is not replaced by a physical file # output- and temporary files will be written to the original directory. diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 12a91173..f7f2fa5f 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -674,7 +674,7 @@ workflow AMPLISEQ { if (!params.skip_summary_report) { SUMMARY_REPORT ( Channel.fromPath("${baseDir}/assets/report_template.Rmd"), - Channel.fromPath("${baseDir}/assets/report_styles.css"), + Channel.fromPath("${baseDir}/assets/nf-core_style.css"), !is_fasta_input && !params.skip_fastqc && !params.skip_multiqc ? MULTIQC.out.plots : [], //.collect().flatten().collectFile(name: "mqc_fastqc_per_sequence_quality_scores_plot_1.svg") !params.skip_cutadapt ? CUTADAPT_WORKFLOW.out.summary.collect().ifEmpty( [] ) : [], find_truncation_values, From eac9b3523fdb150ba8e7012b574100422077a338 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 11 Jul 2023 16:14:59 +0200 Subject: [PATCH 082/230] add version and figure --- assets/nf-core-ampliseq_logo_light_long.png | Bin 0 -> 12840 bytes assets/report_template.Rmd | 57 ++++++++++++++++++-- modules/local/summary_report.nf | 8 +-- workflows/ampliseq.nf | 1 + 4 files changed, 58 insertions(+), 8 deletions(-) create mode 100644 assets/nf-core-ampliseq_logo_light_long.png diff --git a/assets/nf-core-ampliseq_logo_light_long.png b/assets/nf-core-ampliseq_logo_light_long.png new file mode 100644 index 0000000000000000000000000000000000000000..8aac12e2760828636be8e01418cd4b065ef0e3b3 GIT binary patch literal 12840 zcmZ{KWmp?Q*LH9z?hd6b4n>L;D^}dy9a`Mo-Ju1FTPc!4aDs>61zM!I2P+N%3c-2P z=Xu|M-*;tqv)AnGoSidgX6|#&ouH|%fR9Uq3jhG{l@#T)0RYr4p?Ch}VCD$E1` zV6g}48hUG6`7ybBy4inpwqx=RaJOT!^Z#fM0QfI~vRxXt+VJI{oXCw(QAp6+=^`2R z7Nx%w%Pya)A-q|0fr)xx+eB39m855Ti%++^0gt^3#+({_ZY}e9*O}%W1U$xpVYi0{ z+5ry`NxqEG_hB?)KP3s-WplkkpDxcN$1GPK{b5e{3B|+3SM6oTHhW%MW0nw33&y!L zb^h8@4|Yb`ke=6$ZjaLxYqW|N0%tp;yR?jD`%U?GPd6SVB3i~nGDbcQBm0-jYYf|W zzXIHSLtj0+cu70|s&&_AYm5~2eEJG9&i+7EQXF~jb9pa%>lw-zOkYfWqtQ@POyzO5 zOl#dk_{cdR#Z}NF@w?cqDkS$-@W*bA?|A5`Yo@Z?VVqW-rR8YJ=TDMOdmP>6%Ec%? zSLF%TJC!*%i|rQIFpzM0!#PRCcS}|OZ7GI{u+AI5)ZKyr*_XmO?_RrwU!Kzl^LQe% zjdt>Bgon4|w$Q?EBMddk(T~ah3}GrHwkGi2M48;gc-a?ugjS}DLnJY=iJfT2-#sz| z`$B=AO=sofZO}?0cJq=jvEy@j3T8fg%?`Kho8>#0ntRPQAJD(g2lRQqYNf%(hjI+U z(y}q*V(mEX^?!jR=3Hh>Ra%Uz=Q%z5n<$0rPS(2aKD1}qgM{2bmqHy^$(Y7ZW&zJht9dM3E(O8jwB-wS%eBjX z%OPoK^Xig23&3Fwv4>>08Of?g&wKot?WjCRddI~*t>Ak`#A{N|odF(urV9CYE3Z+B zj9=RWmDmG6b>@_Z>W!1J9$Q~bV^+hRw2>0s`~UzgNh)10ZOl*$JR@4d$C%DK1x5_l3a5hk_FThdN* zgT%qv@js$zc*6PnCgUfRAYz8YKazIhUT=y7nt+Zj!neG^twE$?oYpUdpZ^@qa>2Vc z)VCA<&E7tZGhisH;8IN^w|w729QuWlUPZNgKIz>u(Ki;enbaX2TJ5x=*Ld6SidB4c zozy}WxVK$I#FlS>Ay*rQ3F_jk$AF)!AM`(UIi}cHHdXNReF<6pBJOWS8Sjr=@LI{&z{^Ir(PmFy z&5OnMQvsju%YOZiyp_S7%>Lc-X<3k}-`+y|$Fn`6>Jka-PK?d4eYq0Pu6RMPor9?F zTV@}rHM^MfjtC6wFEkSC&L4Kf;c>5;lg$_ng?|ru`BVAoET)JQ$#$aROi+u>5GU4; zjWv&Jx!gChCHy`sTQGbq;XMpz$$K~PfHIE++O|4(j2ao%^)<(Kze$^UGG1+Du&ZkZ(EjhFL;{g^I z`q=4PU-SF7Ijxu`P6`SqzQcAN*Hg$9Gp{3g$so$R-!Y0NDRY4RU5%!M1SlFUP#%<2 zqLZ$SprAKUUXGW-r}m#*HE_g6Q4mkc^#9uOpqx8B_ch<7D$MFZXE-E8@g?S+aoPE3 z69N9q`m!SL$kgjq%Cv-n+T{-@k~C&&Z*Nah3azuh6(oJd2;Z{|Sp6Nvc2Ej#=tVfc zAu5CBFb*j1v$ioHQta;3(9nsqF7M>xbo^E&>LnDFAoibX_xwI&eA7?NlDw<1W?lHe zorfmih}wf2OoltE-N*3jk`9a6V%)nOgD_R0TG_HnOL0Iz`x3)}(ntL{X{RY!VgB~- ztI}*bF9c7f%2JDoG{O*tJDFk`_68?oG}Gai^ah9b&y??77g|>mN$j*GSLxv;HcgtO z@Sqh}7*zm7_~?16Tup4t!wjJIJ2^_8di!cuDNfe{jV+MgMB%qFt$gotN}H3YboNGl zWvQNT>)S&w4syCn8*Loro0mxxbqNzaC>U6vZAoi$R{#Jd`lGC@rjo4e|81Jct+F8G zlZ0abJGwy&o${~T1inNbKQzURh{%0rILJz6faJzLyzPIV<-K{y&XXGH)Z3dj_&qAA zsg|HNlIa>{^I-Ga9`mcm*X+&lXRtQL(-Vo4H58e$KHVlKJWatdMb=Ggp;it|bK-Z5 zGV#{N))kW3%C9y=AtxJKhi1tR0mXM-(V1N%fMP}QG4FonHKwAaX7BO40W1B_WSd@_ zT2VZT_dk3VcZA2~<1mV-h4}XaHhCKik1X4xj7BsQSQB0(e%FGe`xtF>4H|)F3=fO$ zV|u&5jX!Hdk#oEpCVa%| z=@fiMy5*|POW@tJa5aogw1AQ4v-~ItKK_FP3&X5!2KOuh>?M`zT3db)Pxj!OnzjlC z5VozLU>19iq|=RP=&+H6BLr?M=y=_q{jF{9Zn^Tf1=v{>tgDT~B1nAR!^a)76Qzmo3C3}gw8yP}a706;MC??lPsB%nnWVtFg6 z$z$zeJ;Uck_f+WQ2LPA=N^;V={!7P&L7Ap{g{M+_A-+4h9AGt785T=5o}nC^=l$}@ z@$sLsOS*6|=mK!wyhvt_N)AkTX7p2pX^WH&b4rFtn%edQx*U2eEwvmbvpw$s)iP0% zD(lO3mk%ACiECL0cf)J=Y8nQ!YnOX(XYVDQW=A6>eH@EJuC_YE?pnj7h|s0~pGQ0$ z9}xAwk9y;>?syW0ts6uWwt3`z6dR+y&Z+tZH=;tpdTo12jb}m;GrKV-Rg-`EP@&WS!69 zCQSHrt+$K#h}MgXft{@X?GLF4h9S22+=;cp|Fz0X?XVc;RzN`bBGTZ$z44DhswEe{ zW?*10#Q&{$2Awe}`t#r4K0k+QL4WdR;}PsUBUDktiDY_{9L4v3O0Bwhb&~bp%cVD5 zSfR={^guUWH7+7#--tH=cc&Hm=kGRuGhxfjwAc3;3ieg=F=0p(MGwJ+?oPqTXVx1o zO#4MABK5-cWQG9mrMC_3i~vWPLauwv`&W7_Lo-_6IJV*p?Mlg0mqM}(}G6IsVR zR;k@DXZO33sS5gNw~to=#)mHFxX~1QIk~>UW`Lhqf2^1{zS&M5qD<&mMX z;5!ATm{Cq$usL@FSrHyE^uM_+NXozYIx z^GKL~ghCrq=f^V-NGcRzm{93=m?C6`GSq$zy%=uHPq`AxBfI8Y(PF+SRU5szZpL?U z*H9A*JVHo-Yq`+!$K}Tcs2*|zoFb~da-CoL%FDrTh4adx!tzJBGCHDEhFTk^^MuHb zbs-H_)2L~H4=e07#+UX`x7NgAEXq@20-m)O$%oc_v-48tvA>AZwyQA_>wq4cNrutAuE??@2t=d!i0|UX_sugTmTL)<-w)zcZsco4o5V91>bO?g<7S=F#9i8ogGEp>1Q!vw=@umH1w^XQjJm$oN6I0qRZR} z`<_JPs}^EKLKfq7`L53#d{z{y@mpfm4EbUl>6steBF0~oDD99)*Z@XXAgVv}D^*)E zsqx>!G{dxqlmOapGkSM4(?aJ{2V^y?c(`_ww844X&T1lb$u0 zbISirKXZp;MfOBfV9d6% zYc(lmz7T_9e)LN<)3p~_R_jJBX#=MmJ?Qd%rwC~@kfQUsyYfJiD3(Y%h3yC#qq$$O zI_1gd0}^yMju>WIfa@0y5_D;{u~O)f5ZkUa=z0oV4`0%SJg=*O=0_#OxGxOL^_8iohP-|7Lfj7K`h%8oD^af7Hy4D%<_H?}q$57+q(1DyNw%u6yc-Um zGybjAIuUf#c*+NOf0Xm$v_7D#Rc*Ar7<uj6|RaL6~Rd~ z@eR(=7Ad{z$Kpk4pBL0h#>z>y8U}E>1QqSTXODrl{!%P4 zms^rMO+5iza09Tbslk0WvXs?*S`)%MYtl%m6*>&WHZ|yvUdEuaJPfuFtL3VH)^rxx z`YrQdm`=znF2U?3elBp-`eCV&ry=AJ1eM$xUHUj?(zvx;>ygxIftH5ay(1S*&YD?HzmJl%M1zyTbLKjbxJw*NCAN@ysd6Mn6C{;@6XJ_brK^02YGX6f zZ+!ST-%ypjEdLlmsLf>B; zwK-YI!`1`EG(Ra%AE#9kWIi_MG644Vb=AiRK}HdsP-8!W;hb$mg~tiS1QLU+WR2OX zq}ttb!1q}s&H?ofoOIwrGFK0nKTII!I6iQF0uvTQHMZ)T<{6zsT2v`rDBx4@)A$@nhh&#Ff&c!s5u<17Rz1pKxl*+eUU$shCeVyc@5wrid8laWqC@n@P9n7( zC0T;kfxe%%-!c#yNAL5?qz)_0F2ok2)Pf&sN@JNitKP1GTeDJ!Vm(7@ zDBDQeq@4rn5gnu zK>6;c<{QOp>A=KRQ1O!ALPP33hHX0faC;F?H)`YegC#N_a`X43Aq*Y z$S%skA^PcDD6aL)wNnMcnXhf>$2+a zn0}@XVDZlVw69;|fvNYNu1E_qppCa+H9C?r-Yp}7bu(^HC7?0JhulRwj?J)#9Qw-d zT7b2^rW|eMU(L^!G_=AJ?<7^3!^42`*Tz#1YMoGL^fXk5k04>;4_X0<$E`^;D|5Ke z9zeDe=tYJm$LcgaAYj_Ihcnk7sKPbwO3wrdK|0C&r$Izd ztxb#GvthKSqLcm5_2}z1nSkf0EIbF7I`_3KoWp1OlY-TB9xxkd1ak7PXhP~2%Yt!W zZhUj6qTE%*XuL)~e}u2+E+T?-M&QY%!5iZ&SrQQ=HTtQQ9%8@lrdhx#F;pt2AC-YR zEpuWLENkjwpG+sC6a4tU{4NN;hy(QKD$|#>gf?QLoU_BmW?Tn-_j_1T<2cI4Cym$F zS^Aw<$_OZ}R{qkEw}nNJcd??Ko(C6Wp*_vP_;xj6?Bz2aEyjn#IA{61C*;4F2B-#7 zHI+jq{UT=N{-jNNqe0PpwVLp3RvGS=vE4W7Qad-6!>$he7}utPb}pe4^_@{(x9Q*Y{d*@D)u&J+#+Gd;%#yZFS?Mt9B=Dh>y;EDeiG^v&oS*?<1 zTbmaM=+mfOVs&=0#^FhKMpg;kWuL@M&ixTwK>O`dBS^Ai*-@mqa7ai~3mq9uT-WNnKYLjtz zLC8J0U0wif+W^-aWNKe*xKJNwZwETm8$nh1qH~+mN9)BnD_@jj3-NiRpo8Q*`)DWe zhCK_6O(}PQc9JYEz-oY!4Ogd*Xev51ML$Okt6>@Ao~kFR{TIiWcCuY-F0-2u{KZ7f zP7cQX^NOo?GyKUr{J4M7?soVW$5=aDksme8w~MisJ!U&rgq7R*VRk9O=H%uAfo+dP zFCX6jRww4P+pdmDLRL+P(L%^?>bnysIdAqzE#{i!o42>|*lWNolBp*0m>;-5rL*+2 z(M(cBxvy_zBQ)OM>xC5F5>(!#4_)^QekRvV9ro|ngb>MFAf~glvAR;@;-`-jf+@$# zucD$Z(*drH)(xvS3O`GS^pjR6X&5WMs#D%23xl1XlrMZcI6NSAB5s)Ns^HhzBd*`! z2yWzRmeLP`T+lcvh%*!jv{w7yGq|yt7On~KySea%@U-kuI*hJ^(^Kd(9ueCnyTj(EYIEo*rM2yB_@q^ zE+l7O{-El7-P8XBg;S`0OB;5{4UYL%J^5rJX3-p}p66B4sV8o~zRX#0TOrH>sYk5H ziR=4zL4r@d>W#;}HhW<&nCGh~u2sLe;{j{+jb!L`267Dz+>4D~7?I>Gr1@pt+`Yao zX0=50K6sxUXHJ0iEeq0A(Ql|)ZW3Gu3K}1>=q<6F>BlkG>{%h?*Df(;d+lsNP-EGm zoSf!f+<8Ki-d)GSJcq+ATKwYQKcyt?`rM4@uCkYlnp8U3cUs z9F15XRv>{f40q-0=bz0or@t|=uP}5aS3`o(WWTyJWjxud2LJjT@&$v6GDcpI>oHAV z8ADCckdNZ=L$}L|SIco$Wzt73+#YY?jU`)fu!@U zBKRM5F~`_h4L2SNRvb$l+dL1mv?m3#-Un z8vb%Yrx6htXNFyy?{9H;KX&ZKc<3?lJ`QsQvj9t6bHfZHCo0NfSb{{pk~{@pqgzzI zYrgD(uB^pe?2PcL@N{4xy?dJBsAFa>sTy-{XfJA;Q5nZB*gawehf|AVxPxwyTTK#1fVs46iup~nmrzYD7<=b{CdoLA3O*_H~o``I-gi8<|0%V;Je9_J+e>q-NBu9E7XJ_oBv4l zRX-j}P->FNn5QaPgc`9Ui-QPm5%W-^m}$AX;;VkXYL%7*8?NfvF}Y}Ve~wj16z4$#K_(s?N*Ov9-3GFejh)Tn}a-wyy&^Qp^Obf^v!Lo zch?I(-5s062_3Jz%{*Lx+qR=&5(kE~3HF3o9{w2b1(i$TyWwM=o6Js*nQUQn3raRF z@3>FnFznn@xC5&f3fbyo&tr~}crP3I?|H_*zUwi@8O&AhTYc}CCP|Vzy>`V^?3DSb z6O`ix%;58TMwtq{;FFZu(NvtDNk4PthO2`vKh|3NeNH$rnZx~~Tz%i!6Jg?Pz3XWl z;h7K@PWbW2s=1Ki^dlgn{)gb+Q?HPftcex(k{QEke`|N$v~k!Es})ACAP+OsEV|In z_&c6)KJtSPC?}B4D5ytm0=KhJ3zmOKBaU#->93p`AhW*-6ipTGSM!ef>jV+pk>SQb zs;n$w!{YHdUt@qnx{26`_wd_)h1pT$9|LRAPCy#Drn@`Cu(Mqq42xEKPkXbEovR7bxR*?yM8sTH_y^JgG!*_-~f+M#$Y97YwPM3W)sIjcQbnr54T-v# z{lzlLI}-@36T+wDpeJ$w9Qom7K;-vZ+uUK)y)_moA5_GV)WqDG*kRR)ET#_ak1j1z z1U8&-4x_JnP_#K}k?Ls|LNglXkp&6zdciSLv?2-RDOgMPu);kWXhmxhs3%w`Y2iuE zkB-aI#xN!on~Qs7S#2#Gny5VfcEk){`&}UbwO_Gu@v|cMYgU`tL7S0j?}Nsy+mwGonY2 z)P0#i!C-A#6{l+N_gYd!J#V+q{rD+eT|N>#Bw)(1d_b>_<0ge#2m|pvmd(3Z(slVw zRd+Irh3A{REJGMHdJC?ksD!hlkZ$&D&XJbi@-(bg|GU1izlw|RdTdz>4_mp=TOMOX zg+u_!lK{F|4(SymT5OiKhBd*>ssp7ziT0|thkur8Ix}r`< zmhHFn-ddO;3L0;E#OfO38pr>|jO6{%7ZQ}S=mvn;W**p+s1=kN2v3+b(J(Rv8KFFS zQ0L3*l2pum{_~fVoMKV_Kyk4;^DWV}pnZPOtCPnTvz(}}>sUpttn-6JlakIJX3gOJ ziwBd!_2Q3r{j)i%_3KCoUY(h@yHGc@8QGfG&B|^ zwyF*AG#CuJ{(Pj>@a|&v9UgG`*tPEZ70r6Q=N5DL5EnmD$pokPcso!(NAl4_OLSX= zlX(Z8sF{ug#=5D8{_C0$O`SdA@|m-owohX;aMh>bEwgMQ<6pHxhr;F;b#sCEa7DB* zFCckOkAZT?_dcuoyxRcMT$3T9jazd+WrMS#D6_Q?6Kbr1&%_c8Y$yKv^n=H*Us(gG z-yb4vma2h|(d^DGu4Qw`qTz@`SpZcrr-wA{#!$vo-abb;_KYRTyg0^CBDXe?VJBg9 zla~Wh4k6tT)#Jhxe3hrEOb@tE-!p{800(FLX2!i%F>ruir!$? zui69-Nq)#U8kJwHr+5mE;VGvt8cd9vtkZfUOSEP`DF_)1%%}PmH`HnGZKJIgCb{X- zwI*4VUb`9Ng!)~gQqO_ZT)1nG5B?@7e@sDje#_#|?W%S(7rLb6SUF2*@KCA^kna2> zkH!yAGCXK9fVgWqW1JH9>o^^f>@8M(J%pP^06lzm%dpLe9nframqlyckm~30sUcdUB}nt3u7 zEoRQdYH?nH--uQh5cXWO`92|C?<DX{r*_MBc0#61k{iaimeMLdTTS1y z9AAN}0TxCob9K7>1!;G_*BwG38oP7p{$Hm|4Xp~OF(GDKA)W3T>gV{N%VVB1g=0Zx z+vO;E1{av$^nP#4+$+!}4d`;+2}FqX_!D>iHOisWhkmonb#9YW|1{Z5?Pry396~{v zY6Rep1|r15*u}>ee#FFGeK|i?Fr5E>*e-VT{8Z%8<;vdTB3&GLM=-2I9H{5yfv6V4 z6Zxu#(UO#?!(r-))l!LidhQ=4p%y4dJOtVyOLHnN8uzN73p8|ksG7UgwvKx0+P_OLEj=uF-Ug{q;~5wV|Kv|T0Y@{89LF{-@kmq$iQ zG;^h%EYgJTsTox&QgDJ2;;I5(%A9_v97ZR@az%o?j)!^e5nijqUvWJZ=lcgv2kzu& z)SbRc^F`)tY{B}R2fK0`}c^)LF94T zVO(3;NBI;ux=}R$`eQa0X)kB;uFwid9|yLE{Gj#NLQit_GhO}_sVqFVKHEq3-Su`F zKb1fGwbgzmbF}VK1NA6=V}8>OsAmcc7D{MR8n61vAt(4iTSX3P9`#rK)!|YwD{kbC z9JzF3Ae)7uiGm(ppy_fAiV;^Ah&Tep{l$q97UPU@TJ!#vIVkh?llG{XV585(+2ShO zuFBI(SRNS*!M!HCPhiTa5Hi?Pm)Swtwz9#Q24etPF`9ri1E;nZ@SGr>k>k4`kWMZ) zJ|QwgN0sI#K9dW*goc%W-dIPT=OY7d9MrK$)w**~KQh7$a>(i#%Vfx6kv<5ZZP_6m z!~!{{xJwI9^B3Pz(@8fP&o+UN(KQoYW5BzSnrQw!anXh`!f{s{nlvK$M{FbXTO!j< zV4=@sT2MSV38d(m>%R5FYE3^~OSznWZGstAdF`UHKKIAx!U~35A&`Mnh=>2v&#fg> z>J8I@*S9j*!4~Y7I^{FOvs^98K=tyODPR?qnD21?uAU=TGKvZKZJ2WIBxQFupv{-2aMs6!btDrPHPxG*5jQZdK@i(PzA zH|8T5w}cvI5qCE2Wt8)(<{BcBg?EY`+FxDjOkqvnT@Y=NI28)(Bo5Yy0@_}y!Eh%a zp%o8!$P|!3PR60l*9KA>d~@ZH%I=294sEu^lb`~2=4(IQhUgdP@2w#DX!Ygw*I;5ASF|}pK4bM} zZj^Dqt|uN26OSGY(fa8GA-w7I$8;S8_FZZ>>pEcA>z&r->2ze z+zjn&qDey`T<#sNyZ|{)o?1v`zLpaV-IYSA)LM>+P=}?nd^slKP=L0%vRSYR!TcD! z$+|4L#yPXJ*IS--xcKtZj!_q5uBEI?S4mUz&?uvNO`g|du3aQ5f8|BJ#{EtM5U2tG zu($tl0eD~;S50c{m*^~>V~hc_1|bA+_0D*AGg#f6F(n5VuOE9kPk&p~4gW;yhJqlF z;1v9IcG%~m^WkIs$E9pY?#VucJ$vLWE5{;oos3@$xU?%qy6oe{$jB5}Y2d_K^#IXu zW2w|pDONOQYhulX%&m7}I_-4hd1+sSV`uI#|D3zkjmtDb3x3ge3Pb{gT@*s%(&k)( z1eRMq9_kbr3#^G@LTa5SmZpJ;gXsCXYw1zs&D_WP;EM>-<4W^dzc!Yn1^@$nx$>lO z8e2IJ%8C5Sn_&@b5rNg|5K(s*o(!KetEVm(9Wv#z!LTro`s>Lg*#gPk6grXWO_X$W z7N2{&t!US9C6f@rx>Y_?OG+y=P!-unx;|TLM=GzrR$)}T#qflkw^`0;>PSrfp82te zp#@DX6Fi6Ly{&s^VVqXJSweEh zC`f0MR2tDl@jHf(B!v1}t)sG2Y<1d9$ovEeuF^)cm5W2YE!*HRyRDR|Yj6cfy`91} z<7@mlO7T{~``=-wTz@J34Dtx2I?E8FTc8CAvTMT5_`E#^;V+Agui|42*jOuWk?AkR zpD2+#+&trcd~p@EIg#s21a!#sfU{$UK~_uD=q&O6L~%*iU<-C|Q*RG~Iyvn`H+G1c z!R;3jvjWOBC&%lE*nAnXRMTjdd+a?CDmIWuyU>+Y~jv?foX>RsPTwyQ|VnyLmET2_#F+8#7&D!ZOxlNACW*q`Jrm6!ebI2tqSx zC=-W8Kh~=Wfxpg)l1~%9=8NNF7hRuKxAv1+U5Oc~_on>i;XyUvOlq*H%Hd`(oAKf6 zZ@E_L45I_AH}l;#_;3c$r5QiJoIbSM#3^L`O<+aw^mYHIn1-|Q4vg8~yS}K5=#r@x z@p8pC8!I;LIdp-MICxwIIFY1%r`}F zg|u;=7RZ>E(}bi?6UKEqXqoYiC6o~w?fn+b{{op3=~#&oA~Skr;j~UQ0U=u`C?=w}n$PGM6M z)WrKJiy`*GxvB7UK1XNT=|FAUvL}qtJxz_pgKVg;Rf^^1QkT-wyWYg(6e^X~9yEMP$9y!5*+0vGTm7CFV4_{yX8gf3w0Ec8AIShc)3c)GPeFGk$B!(vJrWt5Y&V#09 zi>O__XU4_nQkx#@8<>(46D@O$$oyop%l<0(*GYROhN&ay6OA z1sweJE%P>yds@qPs+)|I+2v1LBl&Q5Kezr%DW~BY!8y;BD$h&QU5;|eC=U;j;1xPP zr-DcJrSt8hzIzAggYlts`OIZAtHwsQ<;U;Wt@@Ats(8<2UQ}g*eL14XqxsQ&-#t5K z5_6U+2Xd%HuTNZeurEYF3@3sb5ux$teB5{8tw>|z1_kG9H`l;Y{JZABa_pC6?j`-@ zl^;Q2yd#LH5%jvQYJ&q-*F#pi`3i;b4Yrw2$6gsYYckc$9~OiJ?Q^I(r{T6qES&yk zpZ!d$TOe#4$IPqK-7`EdzsfPf!KN(Bjn_M#AJjfzYY~0k8rA$ZBB|7CB1d+zGO^&N%BP% zDgOiRW$Y``Xo}W#FrJEj0y5lA0y9+Q+HF`kTIOpwu0tl`9taSMfC%zwmncJ|ZvK(i z%(0LH@Tm~dbu%7m#EvR>bJ_!O|4VDjnfuq#|5U3G{6|Pb64;W$FpAvUT+4KktfGHf zBZ>wieOPiJMrld_c5Cymr{{XF{>!QR|6k$8k5ag(hzEQCRSA*~2~d((m#dYrjQk&g C8zPVZ literal 0 HcmV?d00001 diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index b6f88cd6..c9431953 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -1,5 +1,4 @@ --- -title: Summary of nf-core/ampliseq results output: html_document: toc: true # table of contents @@ -8,11 +7,20 @@ output: theme: default number_sections: true # add section numbering to headers df_print: paged # tables are printed as an html table with support for pagination over rows and columns - css: ./report_styles.css highlight: pygments pdf_document: true +date: "`r Sys.Date()`" #bibliography: ./references.bibtex params: + # report style + css: NULL + logo: NULL + input_dir: "./" + + # pipeline versions + workflow_manifest_version: NULL + workflow_scriptid: NULL + # flags and arguments flag_retain_untrimmed: TRUE flag_ref_tax_user: FALSE @@ -75,19 +83,58 @@ params: diversity_indices_beta: FALSE diversity_indices_adonis: "" picrust_pathways: FALSE - --- -```{r setup, include=FALSE} + + +```{r libraries, include=FALSE} library("dplyr") library("ggplot2") library("knitr") library("DT") library("formattable") library("purrr") -knitr::opts_chunk$set(echo = FALSE) ``` + + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = FALSE) # echo is set in differentialabundance v1.2.0 to TRUE +``` + + + +```{r, echo=FALSE} +htmltools::includeCSS(params$css) +``` + +```{r results="asis", echo=FALSE} +cat(paste0(" + +")) +``` + +```{r} +if ( endsWith( params$workflow_manifest_version, "dev") ) { + ampliseq_version = paste0("version ", params$workflow_manifest_version, ", revision ", params$workflow_scriptid) +} else { + ampliseq_version = paste0("version ",params$workflow_manifest_version) +} +report_title <- "Summary of analysis results" +report_subtitle <- paste0('nf-core/ampliseq workflow ', ampliseq_version) +``` + +--- +title: "`r report_title`" +subtitle: `r report_subtitle` +--- + +# Abstract + The bioinformatics analysis pipeline [nfcore/ampliseq](https://nf-co.re/ampliseq) is used for amplicon sequencing, supporting denoising of any amplicon and supports a variety of taxonomic databases for taxonomic assignment of 16S, ITS, CO1 and 18S amplicons. diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 4c86b6fc..1f0c7e96 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -13,6 +13,7 @@ process SUMMARY_REPORT { input: path(report_template) path(report_styles) + path(report_logo) path(mqc_plots) path(ca_summary) val(find_truncation_values) @@ -64,6 +65,10 @@ process SUMMARY_REPORT { // all non-boolean or non-numeric values must be encumbered by single quotes (')! // all elements must have a value, i.e. booleans also need to be set to TRUE def params_list_named = [ + "css='$report_styles'", + "logo='$report_logo'", + "workflow_manifest_version='${workflow.manifest.version}'", + "workflow_scriptid='${workflow.scriptId.substring(0,10)}'", meta.single_end ? "flag_single_end=TRUE" : "", mqc_plots ? "mqc_plot='${mqc_plots}/svg/mqc_fastqc_per_sequence_quality_scores_plot_1.svg'" : "", ca_summary ? @@ -115,9 +120,6 @@ process SUMMARY_REPORT { #!/usr/bin/env Rscript library(rmarkdown) - # Rename .css file to be piced up by Rmd file - file.copy("./${report_styles}", "./report_styles.css", overwrite = TRUE) - # Work around https://github.com/rstudio/rmarkdown/issues/1508 # If the symbolic link is not replaced by a physical file # output- and temporary files will be written to the original directory. diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index f7f2fa5f..9f894b0b 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -675,6 +675,7 @@ workflow AMPLISEQ { SUMMARY_REPORT ( Channel.fromPath("${baseDir}/assets/report_template.Rmd"), Channel.fromPath("${baseDir}/assets/nf-core_style.css"), + Channel.fromPath("${baseDir}/assets/nf-core-ampliseq_logo_light_long.png"), !is_fasta_input && !params.skip_fastqc && !params.skip_multiqc ? MULTIQC.out.plots : [], //.collect().flatten().collectFile(name: "mqc_fastqc_per_sequence_quality_scores_plot_1.svg") !params.skip_cutadapt ? CUTADAPT_WORKFLOW.out.summary.collect().ifEmpty( [] ) : [], find_truncation_values, From a1002f7e549c5ab9bdfa6e2f2a011795c17c00d2 Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 26 Jul 2023 18:05:58 +0200 Subject: [PATCH 083/230] update params and docs --- CHANGELOG.md | 2 ++ docs/output.md | 15 +++++++++++++++ modules/local/summary_report.nf | 6 ++++++ nextflow.config | 7 ++++++- nextflow_schema.json | 28 +++++++++++++++++++++++++++- workflows/ampliseq.nf | 18 ++++++++++++++---- 6 files changed, 70 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a6a26885..7d7a4c88 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- [#558](https://github.com/nf-core/ampliseq/pull/558) - Html report of results + ### `Changed` ### `Fixed` diff --git a/docs/output.md b/docs/output.md index 2d479272..7ee31ef5 100644 --- a/docs/output.md +++ b/docs/output.md @@ -17,6 +17,7 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: - [Input](#input) - Input files +- [Html summary](#html-summary) - Overview of pipeline output - [Preprocessing](#preprocessing) - [FastQC](#fastqc) - Read quality control - [Cutadapt](#cutadapt) - Primer trimming @@ -58,6 +59,20 @@ Samplesheet, ASV fasta, and metadata file are copied into the results folder.
    +### Html summary + +A summary report for most pipeline results in html format produced by [R Markdown](https://rmarkdown.rstudio.com/). The report gives a generl overview of the analysis, includes many tables and visualizations, and links to interactive downstream analysis results, if available. + +
    +Output files + +- `summary_report/` + - `summary_report.html`: a standalone HTML file that can be viewed in your web browser. + - `*.svg*`: plots that were produced for (and are included) in the report. + - `versions.yml`: software versions used to produce this report. + +
    + ### Preprocessing #### FastQC diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 1f0c7e96..5ffe6d05 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -9,6 +9,12 @@ process SUMMARY_REPORT { 'https://depot.galaxyproject.org/singularity/mulled-v2-31ad840d814d356e5f98030a4ee308a16db64ec5:0e852a1e4063fdcbe3f254ac2c7469747a60e361-0' : 'biocontainers/mulled-v2-31ad840d814d356e5f98030a4ee308a16db64ec5:0e852a1e4063fdcbe3f254ac2c7469747a60e361-0' }" */ + /* this is from https://github.com/BioContainers/multi-package-containers/pull/2663 but doesnt work: /usr/local/bin/pandoc: error while loading shared libraries: libgmp.so.10: cannot open shared object file: No such file or directory + conda "conda-forge::r-base=4.2.3 conda-forge::r-rmarkdown=2.22 conda-forge::r-tidyverse=2.0.0 conda-forge::r-knitr=1.43 conda-forge::r-dt=0.28 conda-forge::r-dtplyr=1.3.1 conda-forge::r-formattable=0.2.1 conda-forge::r-purrr=1.0.1 conda-forge::r-vegan=2.6_4 conda-forge::r-optparse=1.7.3 conda-forge::r-ggplot2=3.4.2 conda-forge::r-dplyr=1.1.2 conda-forge::r-data.table=1.14.8 conda-forge::pandoc=2.19.2 conda-forge::r-patchwork=1.1.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-6726188ca70388ddb300400dc7fe71101a4f89f2:0346b3395cd017327aff8dae37aad0a027a7613c-0' : + 'biocontainers/mulled-v2-6726188ca70388ddb300400dc7fe71101a4f89f2:0346b3395cd017327aff8dae37aad0a027a7613c-0' }" + */ input: path(report_template) diff --git a/nextflow.config b/nextflow.config index 7e43914f..0c8f9353 100644 --- a/nextflow.config +++ b/nextflow.config @@ -70,6 +70,11 @@ params { diversity_rarefaction_depth = 500 ancom_sample_min_count = 1 + // Report options + report_template = null + report_css = null + report_logo = null + // Skipping options skip_cutadapt = false skip_dada_quality = false @@ -86,7 +91,7 @@ params { skip_diversity_indices = false skip_ancom = false skip_multiqc = false - skip_summary_report = false + skip_report = false // Database options dada_ref_taxonomy = "silva=138" diff --git a/nextflow_schema.json b/nextflow_schema.json index 75b4def4..715052e6 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -496,6 +496,29 @@ } } }, + "pipeline_report": { + "title": "Pipeline report", + "type": "object", + "description": "", + "default": "", + "properties": { + "report_template": { + "type": "string", + "default": null, + "description": "Path to Markdown file (Rmd)" + }, + "report_css": { + "type": "string", + "default": null, + "description": "Path to style file (css)" + }, + "report_logo": { + "type": "string", + "default": null, + "description": "Path to logo file (png)" + } + } + }, "skipping_specific_steps": { "title": "Skipping specific steps", "type": "object", @@ -558,7 +581,7 @@ "type": "boolean", "description": "Skip MultiQC reporting" }, - "skip_summary_report": { + "skip_report": { "type": "boolean", "description": "Skip Markdown summary report" } @@ -790,6 +813,9 @@ { "$ref": "#/definitions/downstream_analysis" }, + { + "$ref": "#/definitions/pipeline_report" + }, { "$ref": "#/definitions/skipping_specific_steps" }, diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 9f894b0b..41da11e2 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -73,6 +73,16 @@ if (params.sintax_ref_taxonomy && !params.skip_taxonomy) { val_sintax_ref_taxonomy = "none" } +// report sources +ch_report_template = params.report_template ? + Channel.fromPath("${params.report_template}", checkIfExists: true) : + Channel.fromPath("${baseDir}/assets/report_template.Rmd") +ch_report_css = params.report_css ? + Channel.fromPath("${params.report_css}", checkIfExists: true) : + Channel.fromPath("${baseDir}/assets/nf-core_style.css") +ch_report_logo = params.report_logo ? + Channel.fromPath("${params.report_logo}", checkIfExists: true) : + Channel.fromPath("${baseDir}/assets/nf-core-ampliseq_logo_light_long.png") // Set non-params Variables @@ -671,11 +681,11 @@ workflow AMPLISEQ { // // MODULE: Summary Report // - if (!params.skip_summary_report) { + if (!params.skip_report) { SUMMARY_REPORT ( - Channel.fromPath("${baseDir}/assets/report_template.Rmd"), - Channel.fromPath("${baseDir}/assets/nf-core_style.css"), - Channel.fromPath("${baseDir}/assets/nf-core-ampliseq_logo_light_long.png"), + ch_report_template, + ch_report_css, + ch_report_logo, !is_fasta_input && !params.skip_fastqc && !params.skip_multiqc ? MULTIQC.out.plots : [], //.collect().flatten().collectFile(name: "mqc_fastqc_per_sequence_quality_scores_plot_1.svg") !params.skip_cutadapt ? CUTADAPT_WORKFLOW.out.summary.collect().ifEmpty( [] ) : [], find_truncation_values, From e30f1623c9dc689ab2301c4373e476457c02c2d3 Mon Sep 17 00:00:00 2001 From: daniel Date: Thu, 27 Jul 2023 10:57:04 +0200 Subject: [PATCH 084/230] update .nf.test files --- conf/test_doubleprimers.config | 2 +- tests/pipeline/doubleprimers.nf.test | 3 ++- tests/pipeline/fasta.nf.test | 3 ++- tests/pipeline/iontorrent.nf.test | 3 ++- tests/pipeline/multi.nf.test | 3 ++- tests/pipeline/novaseq.nf.test | 3 ++- tests/pipeline/pacbio_its.nf.test | 3 ++- tests/pipeline/pplace.nf.test | 3 ++- tests/pipeline/reftaxcustom.nf.test | 3 ++- tests/pipeline/single.nf.test | 3 ++- tests/pipeline/sintax.nf.test | 3 ++- tests/pipeline/test.nf.test | 3 ++- 12 files changed, 23 insertions(+), 12 deletions(-) diff --git a/conf/test_doubleprimers.config b/conf/test_doubleprimers.config index 6b275dc8..75c4afab 100644 --- a/conf/test_doubleprimers.config +++ b/conf/test_doubleprimers.config @@ -23,7 +23,7 @@ params { FW_primer = "NNNNCCTAHGGGRBGCAGCAG" RV_primer = "GACTACHVGGGTATCTAATCC" double_primer = true - dada_ref_taxonomy = false + skip_dada_taxonomy = true input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet_double_primer.tsv" trunc_qmin = 30 skip_fastqc = true diff --git a/tests/pipeline/doubleprimers.nf.test b/tests/pipeline/doubleprimers.nf.test index cd810025..9cbf470a 100644 --- a/tests/pipeline/doubleprimers.nf.test +++ b/tests/pipeline/doubleprimers.nf.test @@ -33,7 +33,8 @@ nextflow_pipeline { { assert new File("$outputDir/qiime2/input/table.qza").exists() }, { assert snapshot(path("$outputDir/input/Samplesheet_double_primer.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/summary_report/summary_report.html").exists() } ) } } diff --git a/tests/pipeline/fasta.nf.test b/tests/pipeline/fasta.nf.test index 9daca857..8db0826b 100644 --- a/tests/pipeline/fasta.nf.test +++ b/tests/pipeline/fasta.nf.test @@ -25,7 +25,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/dada2/ref_taxonomy.rdp_18.txt")).match("dada2") }, { assert new File("$outputDir/dada2/ASV_tax_species.rdp_18.tsv").exists() }, { assert new File("$outputDir/dada2/ASV_tax.rdp_18.tsv").exists() }, - { assert snapshot(path("$outputDir/input/ASV_seqs.fasta")).match("input") } + { assert snapshot(path("$outputDir/input/ASV_seqs.fasta")).match("input") }, + { assert new File("$outputDir/summary_report/summary_report.html").exists() } ) } } diff --git a/tests/pipeline/iontorrent.nf.test b/tests/pipeline/iontorrent.nf.test index 9b73af86..6a7c3a9f 100644 --- a/tests/pipeline/iontorrent.nf.test +++ b/tests/pipeline/iontorrent.nf.test @@ -38,7 +38,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/input/Samplesheet_it_SE_ITS.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/summary_report/summary_report.html").exists() } ) } } diff --git a/tests/pipeline/multi.nf.test b/tests/pipeline/multi.nf.test index e4fe28a0..c0b099bd 100644 --- a/tests/pipeline/multi.nf.test +++ b/tests/pipeline/multi.nf.test @@ -63,7 +63,8 @@ nextflow_pipeline { { assert new File("$outputDir/qiime2/representative_sequences/filtered-sequences.qza").exists() }, { assert new File("$outputDir/qiime2/representative_sequences/rep-seq.fasta").exists() }, { assert snapshot(path("$outputDir/qiime2/representative_sequences/descriptive_stats.tsv"), - path("$outputDir/qiime2/representative_sequences/seven_number_summary.tsv")).match("qiime2") } + path("$outputDir/qiime2/representative_sequences/seven_number_summary.tsv")).match("qiime2") }, + { assert new File("$outputDir/summary_report/summary_report.html").exists() } ) } } diff --git a/tests/pipeline/novaseq.nf.test b/tests/pipeline/novaseq.nf.test index a2101d3d..a346898d 100644 --- a/tests/pipeline/novaseq.nf.test +++ b/tests/pipeline/novaseq.nf.test @@ -28,7 +28,8 @@ nextflow_pipeline { { assert new File("$outputDir/fastqc/S2_2_fastqc.html").exists() }, { assert snapshot(path("$outputDir/input/Samplesheet_novaseq.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt")).match("multiqc") }, + { assert new File("$outputDir/summary_report/summary_report.html").exists() } ) } } diff --git a/tests/pipeline/pacbio_its.nf.test b/tests/pipeline/pacbio_its.nf.test index 39e1d2a2..c5314798 100644 --- a/tests/pipeline/pacbio_its.nf.test +++ b/tests/pipeline/pacbio_its.nf.test @@ -52,7 +52,8 @@ nextflow_pipeline { path("$outputDir/SBDI/emof.tsv"), path("$outputDir/SBDI/event.tsv")).match("SBDI") }, { assert new File("$outputDir/SBDI/annotation.tsv").exists() }, - { assert new File("$outputDir/SBDI/asv-table.tsv").exists() } + { assert new File("$outputDir/SBDI/asv-table.tsv").exists() }, + { assert new File("$outputDir/summary_report/summary_report.html").exists() } ) } } diff --git a/tests/pipeline/pplace.nf.test b/tests/pipeline/pplace.nf.test index b78c479b..b0507df7 100644 --- a/tests/pipeline/pplace.nf.test +++ b/tests/pipeline/pplace.nf.test @@ -55,7 +55,8 @@ nextflow_pipeline { { assert new File("$outputDir/pplace/test_pplace.taxonomy.per_query.tsv").exists() }, { assert new File("$outputDir/pplace/test_pplace.graft.test_pplace.epa_result.newick").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/summary_report/summary_report.html").exists() } ) } } diff --git a/tests/pipeline/reftaxcustom.nf.test b/tests/pipeline/reftaxcustom.nf.test index 42e0d104..48e98fdf 100644 --- a/tests/pipeline/reftaxcustom.nf.test +++ b/tests/pipeline/reftaxcustom.nf.test @@ -43,7 +43,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/input/Samplesheet.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/summary_report/summary_report.html").exists() } ) } } diff --git a/tests/pipeline/single.nf.test b/tests/pipeline/single.nf.test index be236c9a..44d71baf 100644 --- a/tests/pipeline/single.nf.test +++ b/tests/pipeline/single.nf.test @@ -44,7 +44,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/input/Samplesheet_single_end.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/summary_report/summary_report.html").exists() } ) } } diff --git a/tests/pipeline/sintax.nf.test b/tests/pipeline/sintax.nf.test index f6de2995..dd3d3892 100644 --- a/tests/pipeline/sintax.nf.test +++ b/tests/pipeline/sintax.nf.test @@ -65,7 +65,8 @@ nextflow_pipeline { { assert new File("$outputDir/sintax/ASV_tax_sintax.unite-fungi.tsv").exists() }, { assert new File("$outputDir/sintax/ref_taxonomy_sintax.txt").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/summary_report/summary_report.html").exists() } ) } } diff --git a/tests/pipeline/test.nf.test b/tests/pipeline/test.nf.test index 7b295941..b9224114 100644 --- a/tests/pipeline/test.nf.test +++ b/tests/pipeline/test.nf.test @@ -93,7 +93,8 @@ nextflow_pipeline { path("$outputDir/SBDI/emof.tsv"), path("$outputDir/SBDI/event.tsv")).match("SBDI") }, { assert new File("$outputDir/SBDI/annotation.tsv").exists() }, - { assert new File("$outputDir/SBDI/asv-table.tsv").exists() } + { assert new File("$outputDir/SBDI/asv-table.tsv").exists() }, + { assert new File("$outputDir/summary_report/summary_report.html").exists() } ) } } From 2e65ec807d3d7916339f3e8839585951eead476f Mon Sep 17 00:00:00 2001 From: daniel Date: Thu, 27 Jul 2023 11:08:09 +0200 Subject: [PATCH 085/230] update README --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 56e499a3..b214cc61 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,8 @@ By default, the pipeline currently performs the following: - Taxonomical classification using DADA2, [SINTAX](https://doi.org/10.1101/074161) or [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) - Excludes unwanted taxa, produces absolute and relative feature/taxa count tables and plots, plots alpha rarefaction curves, computes alpha and beta diversity indices and plots thereof ([QIIME2](https://www.nature.com/articles/s41587-019-0209-9)) - Calls differentially abundant taxa ([ANCOM](https://www.ncbi.nlm.nih.gov/pubmed/26028277)) -- Overall pipeline run summaries ([MultiQC](https://multiqc.info/)) +- Pipeline QC summaries ([MultiQC](https://multiqc.info/)) +- Overall pipeline html report ([R Markdown](https://github.com/rstudio/rmarkdown)) ## Usage From 13e2912c8d4131a20576803d1c0205bc5f501154 Mon Sep 17 00:00:00 2001 From: daniel Date: Thu, 27 Jul 2023 13:15:08 +0200 Subject: [PATCH 086/230] fix report channels and test_pplace --- assets/report_template.Rmd | 2 +- conf/test_pplace.config | 2 +- workflows/ampliseq.nf | 48 +++++++++++++++++++------------------- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index c9431953..b222a0a3 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -940,7 +940,7 @@ cat("\n\nSINTAX taxonomy assignments can be found in folder [sintax](../sintax). # Header cat("## Phylogenetic Placement\n", "Phylogenetic placement grafts sequences onto a phylogenetic reference tree and optionally outputs taxonomic annotations. The reference tree is ideally made from full-length high-quality sequences containing better evolutionary signal than short amplicons. It is hence superior to estimating de-novo phylogenetic trees from short amplicon sequences. ", - "Extraction of taxonomic classification wads performed with EPA-NG and GAPPA. ") + "Extraction of taxonomic classification was performed with EPA-NG and GAPPA. ") # Read file and prepare table asv_tax <- read.table(params$pplace_taxonomy, header = TRUE, sep = "\t") diff --git a/conf/test_pplace.config b/conf/test_pplace.config index b6eaff1d..ecd5424d 100644 --- a/conf/test_pplace.config +++ b/conf/test_pplace.config @@ -24,7 +24,7 @@ params { RV_primer = "GGACTACNVGGGTWTCTAAT" input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet.tsv" metadata = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Metadata.tsv" - dada_ref_taxonomy = false + skip_dada_taxonomy = true qiime_ref_taxonomy = "greengenes85" filter_ssu = "bac" diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 41da11e2..3da0aa28 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -76,13 +76,13 @@ if (params.sintax_ref_taxonomy && !params.skip_taxonomy) { // report sources ch_report_template = params.report_template ? Channel.fromPath("${params.report_template}", checkIfExists: true) : - Channel.fromPath("${baseDir}/assets/report_template.Rmd") + Channel.fromPath("$projectDir/assets/report_template.Rmd") ch_report_css = params.report_css ? Channel.fromPath("${params.report_css}", checkIfExists: true) : - Channel.fromPath("${baseDir}/assets/nf-core_style.css") + Channel.fromPath("$projectDir/assets/nf-core_style.css") ch_report_logo = params.report_logo ? Channel.fromPath("${params.report_logo}", checkIfExists: true) : - Channel.fromPath("${baseDir}/assets/nf-core-ampliseq_logo_light_long.png") + Channel.fromPath("$projectDir/assets/nf-core-ampliseq_logo_light_long.png") // Set non-params Variables @@ -710,31 +710,31 @@ workflow AMPLISEQ { ch_unfiltered_fasta.ifEmpty( [] ), // this is identical to DADA2_MERGE.out.fasta if !is_fasta_input DADA2_MERGE.out.dada2asv.ifEmpty( [] ), DADA2_MERGE.out.dada2stats.ifEmpty( [] ), - !params.skip_barrnap ? BARRNAPSUMMARY.out.summary : [], - params.filter_ssu ? FILTER_SSU.out.stats : [], - params.filter_ssu ? FILTER_SSU.out.asv : [], - params.min_len_asv || params.max_len_asv ? FILTER_LEN_ASV.out.stats : [], - params.min_len_asv || params.max_len_asv ? FILTER_LEN_ASV.out.len_orig : [], - params.filter_codons ? FILTER_CODONS.out.stats : [], - params.cut_its != "none" ? ITSX_CUTASV.out.summary : [], - !params.skip_taxonomy && params.dada_ref_taxonomy && !params.skip_dada_taxonomy ? ch_dada2_tax : [], - !params.skip_taxonomy && params.dada_ref_taxonomy && !params.skip_dada_taxonomy ? DADA2_TAXONOMY_WF.out.cut_tax : [[],[]], - !params.skip_taxonomy && params.sintax_ref_taxonomy ? ch_sintax_tax : [], - !params.skip_taxonomy && params.pplace_tree ? ch_pplace_tax : [], - !params.skip_taxonomy && params.pplace_tree ? FASTA_NEWICK_EPANG_GAPPA.out.heattree : [[],[]], - !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv : [], + !params.skip_barrnap ? BARRNAPSUMMARY.out.summary.ifEmpty( [] ) : [], + params.filter_ssu ? FILTER_SSU.out.stats.ifEmpty( [] ) : [], + params.filter_ssu ? FILTER_SSU.out.asv.ifEmpty( [] ) : [], + params.min_len_asv || params.max_len_asv ? FILTER_LEN_ASV.out.stats.ifEmpty( [] ) : [], + params.min_len_asv || params.max_len_asv ? FILTER_LEN_ASV.out.len_orig.ifEmpty( [] ) : [], + params.filter_codons ? FILTER_CODONS.out.stats.ifEmpty( [] ) : [], + params.cut_its != "none" ? ITSX_CUTASV.out.summary.ifEmpty( [] ) : [], + !params.skip_taxonomy && params.dada_ref_taxonomy && !params.skip_dada_taxonomy ? ch_dada2_tax.ifEmpty( [] ) : [], + !params.skip_taxonomy && params.dada_ref_taxonomy && !params.skip_dada_taxonomy ? DADA2_TAXONOMY_WF.out.cut_tax.ifEmpty( [[],[]] ) : [[],[]], + !params.skip_taxonomy && params.sintax_ref_taxonomy ? ch_sintax_tax.ifEmpty( [] ) : [], + !params.skip_taxonomy && params.pplace_tree ? ch_pplace_tax.ifEmpty( [] ) : [], + !params.skip_taxonomy && params.pplace_tree ? FASTA_NEWICK_EPANG_GAPPA.out.heattree.ifEmpty( [[],[]] ) : [[],[]], + !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv.ifEmpty( [] ) : [], run_qiime2, run_qiime2 ? val_used_taxonomy : "", run_qiime2 && ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) ? ch_dada2_asv.countLines()+","+QIIME2_FILTERTAXA.out.tsv.countLines() : "", - run_qiime2 && ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) ? FILTER_STATS.out.tsv : [], - run_qiime2 && !params.skip_barplot ? QIIME2_BARPLOT.out.folder : [], + run_qiime2 && ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) ? FILTER_STATS.out.tsv.ifEmpty( [] ) : [], + run_qiime2 && !params.skip_barplot ? QIIME2_BARPLOT.out.folder.ifEmpty( [] ) : [], run_qiime2 && !params.skip_abundance_tables ? "done" : "", - run_qiime2 && !params.skip_alpha_rarefaction ? "done" : "", - run_qiime2 && !params.skip_diversity_indices && params.metadata ? QIIME2_DIVERSITY.out.depth : [], - run_qiime2 && !params.skip_diversity_indices && params.metadata ? QIIME2_DIVERSITY.out.beta.collect() : [], - run_qiime2 && !params.skip_diversity_indices && params.metadata ? QIIME2_DIVERSITY.out.adonis.collect() : [], - run_qiime2 && !params.skip_ancom && params.metadata ? QIIME2_ANCOM.out.ancom.collect() : [], - params.picrust ? PICRUST.out.pathways : [] + run_qiime2 && !params.skip_alpha_rarefaction && params.metadata ? "done" : "", + run_qiime2 && !params.skip_diversity_indices && params.metadata ? QIIME2_DIVERSITY.out.depth.ifEmpty( [] ) : [], + run_qiime2 && !params.skip_diversity_indices && params.metadata ? QIIME2_DIVERSITY.out.beta.collect().ifEmpty( [] ) : [], + run_qiime2 && !params.skip_diversity_indices && params.metadata ? QIIME2_DIVERSITY.out.adonis.collect().ifEmpty( [] ) : [], + run_qiime2 && !params.skip_ancom && params.metadata ? QIIME2_ANCOM.out.ancom.collect().ifEmpty( [] ) : [], + params.picrust ? PICRUST.out.pathways.ifEmpty( [] ) : [] ) ch_versions = ch_versions.mix(SUMMARY_REPORT.out.versions) } From 0772afd23551a69fe010e71a121394f30c672cc0 Mon Sep 17 00:00:00 2001 From: daniel Date: Thu, 27 Jul 2023 13:50:05 +0200 Subject: [PATCH 087/230] fix nf-test doubleprimers --- tests/pipeline/doubleprimers.nf.test | 2 -- tests/pipeline/doubleprimers.nf.test.snap | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/pipeline/doubleprimers.nf.test b/tests/pipeline/doubleprimers.nf.test index 9cbf470a..5d641077 100644 --- a/tests/pipeline/doubleprimers.nf.test +++ b/tests/pipeline/doubleprimers.nf.test @@ -29,8 +29,6 @@ nextflow_pipeline { path("$outputDir/dada2/DADA2_stats.tsv"), path("$outputDir/dada2/DADA2_table.rds"), path("$outputDir/dada2/DADA2_table.tsv")).match("dada2") }, - { assert new File("$outputDir/qiime2/input/rep-seqs.qza").exists() }, - { assert new File("$outputDir/qiime2/input/table.qza").exists() }, { assert snapshot(path("$outputDir/input/Samplesheet_double_primer.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, diff --git a/tests/pipeline/doubleprimers.nf.test.snap b/tests/pipeline/doubleprimers.nf.test.snap index 64ddaa21..cefcf1b9 100644 --- a/tests/pipeline/doubleprimers.nf.test.snap +++ b/tests/pipeline/doubleprimers.nf.test.snap @@ -13,9 +13,9 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], - "timestamp": "2023-05-28T21:08:54+0000" + "timestamp": "2023-07-27T13:49:03+0000" }, "overall_summary_tsv": { "content": [ From a461ddc415ef85e837cfa3b1d1095ce0d98ad6d1 Mon Sep 17 00:00:00 2001 From: Daniel Straub <42973691+d4straub@users.noreply.github.com> Date: Fri, 28 Jul 2023 14:54:26 +0200 Subject: [PATCH 088/230] Apply suggestions from code review Co-authored-by: WackerO <43847497+WackerO@users.noreply.github.com> --- assets/report_template.Rmd | 12 ++++++------ docs/output.md | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index b222a0a3..5852615e 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -164,9 +164,9 @@ cat("## Primer removal with Cutadapt\n") cat("Cutadapt is trimming primer sequences from sequencing reads. ", "Primer sequences are non-biological sequences that often introduce ", "point mutations that do not reflect sample sequences. This is especially ", - "true for degenerated PCR primer. If primer trimming would be omitted, artifactual ", + "true for degenerated PCR primer. If primer trimming were to be omitted, artifactual ", "amplicon sequence variants might be computed by the denoising tool or ", - "sequences might be lost due to become labelled as PCR chimera.\n\n") + "sequences might be lost due to being labelled as PCR chimera.\n\n") # import tsv cutadapt_summary <- read.table(file = params$ca_sum_path, header = TRUE, sep = "\t") @@ -1012,7 +1012,7 @@ cat("# Downstream analysis with QIIME2\n", cat("## ASV filtering\n", "Unwanted taxa are often off-targets generated in PCR with primers that are not perfectly specific for the target DNA, for 16S rRNA sequencing mitrochondria and chloroplast sequences are typically removed because these are frequent unwanted non-bacteria PCR products. ") if ( params$exclude_taxa != "none" ) { - cat("ASVs were removed when the taxonomic string contanied any of '", params$exclude_taxa, "' (comma separated)") + cat("ASVs were removed when the taxonomic string contained any of '", params$exclude_taxa, "' (comma separated)") } if ( params$min_frequency != 1 ) { cat(", had fewer than", params$min_frequency ,"total read counts over all sample") @@ -1077,7 +1077,7 @@ for (category in metadata_category_barplot) { ```{r, eval = !isFALSE(params$alpha_rarefaction), results='asis'} cat("## Alpha diversity rarefaction curves\n", - "Produces rarefaction plots for several alpha diversity indices, and is primarily used to determine if the richness of the samples has been fully observed or sequenced. If the slope of the curves does not level out and the lines do not becomes horizontal, this might be because the sequencing depth was too low to observe all diversity or that sequencing error artificially increases sequence diversity and causes false discoveries. ") + "Produces rarefaction plots for several alpha diversity indices, and is primarily used to determine if the richness of the samples has been fully observed or sequenced. If the slope of the curves does not level out and the lines do not become horizontal, this might be because the sequencing depth was too low to observe all diversity or that sequencing error artificially increases sequence diversity and causes false discoveries. ") # warning if dada_sample_inference is independent, because alpha diversities are not expected to be accurate! if ( params$dada_sample_inference == "independent") { cat("Please note that ASVs were inferred for each sample independently, that can make alpha diversity indices a poor estimate of true diversity. ") @@ -1097,13 +1097,13 @@ if ( params$dada_sample_inference == "independent") { cat("Please note that ASVs were inferred for each sample independently, that can make alpha diversity indices a poor estimate of true diversity. ") } cat("This step calculates alpha diversity using various methods and performs pairwise comparisons of groups of samples. It is based on a phylogenetic tree of all ASV sequences. ", - "Folder [qiime2/diversity/alpha_diversity](../qiime2/diversity/alpha_diversity) contains the alpha-diverity data:\n", + "Folder [qiime2/diversity/alpha_diversity](../qiime2/diversity/alpha_diversity) contains the alpha-diversity data:\n", "- Shannon’s diversity index (quantitative): [qiime2/diversity/alpha_diversity/shannon_vector/index.html](../qiime2/diversity/alpha_diversity/shannon_vector/index.html)\n", "- Pielou’s Evenness: [qiime2/diversity/alpha_diversity/evenness_vector/index.html](../qiime2/diversity/alpha_diversity/evenness_vector/index.html)\n", "- Faith’s Phylogenetic Diversity (qualitiative, phylogenetic) [qiime2/diversity/alpha_diversity/faith_pd_vector/index.html](../qiime2/diversity/alpha_diversity/faith_pd_vector/index.html)\n", "- Observed OTUs (qualitative): [qiime2/diversity/alpha_diversity/observed_otus_vector/index.html](../qiime2/diversity/alpha_diversity/observed_otus_vector/index.html)\n", "\n### Beta diversity indices\n", - "Beta diversity measures the species community differences between samples. This step calculates beta diversity distances using various methods and performs pairwise comparisons of groups of samples. Additionally, principle coordinates analysis (PCoA) plots are produced that can be visualized with Emperor in your default browser without the need for installation. This calculations are based on a phylogenetic tree of all ASV sequences. ", + "Beta diversity measures the species community differences between samples. This step calculates beta diversity distances using various methods and performs pairwise comparisons of groups of samples. Additionally, principle coordinates analysis (PCoA) plots are produced that can be visualized with Emperor in your default browser without the need for installation. These calculations are based on a phylogenetic tree of all ASV sequences. ", "Folder [qiime2/diversity/beta_diversity](../qiime2/diversity/beta_diversity) contains the beta-diverity data:\n", "1 PCoA for four different beta diversity distances are accessible via:\n", "- Bray-Curtis distance (quantitative): [qiime2/diversity/beta_diversity/bray_curtis_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/bray_curtis_pcoa_results-PCoA/index.html)\n", diff --git a/docs/output.md b/docs/output.md index 7ee31ef5..963c43b3 100644 --- a/docs/output.md +++ b/docs/output.md @@ -61,14 +61,14 @@ Samplesheet, ASV fasta, and metadata file are copied into the results folder. ### Html summary -A summary report for most pipeline results in html format produced by [R Markdown](https://rmarkdown.rstudio.com/). The report gives a generl overview of the analysis, includes many tables and visualizations, and links to interactive downstream analysis results, if available. +A summary report for most pipeline results in html format produced by [R Markdown](https://rmarkdown.rstudio.com/). The report gives a general overview of the analysis, includes many tables and visualizations, and links to interactive downstream analysis results, if available.
    Output files - `summary_report/` - `summary_report.html`: a standalone HTML file that can be viewed in your web browser. - - `*.svg*`: plots that were produced for (and are included) in the report. + - `*.svg*`: plots that were produced for (and are included in) the report. - `versions.yml`: software versions used to produce this report.
    From f7326886747df81e41fd2738efbc3bbed17b5e43 Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 31 Jul 2023 16:08:03 +0200 Subject: [PATCH 089/230] add methods section and improve layout --- assets/report_template.Rmd | 141 ++++++++++++++++++++++++++++--------- 1 file changed, 107 insertions(+), 34 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 5852615e..1c05a17e 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -118,6 +118,8 @@ cat(paste0(" ")) ``` + + ```{r} if ( endsWith( params$workflow_manifest_version, "dev") ) { ampliseq_version = paste0("version ", params$workflow_manifest_version, ", revision ", params$workflow_scriptid) @@ -133,18 +135,22 @@ title: " + # Abstract The bioinformatics analysis pipeline [nfcore/ampliseq](https://nf-co.re/ampliseq) is used for amplicon sequencing, supporting denoising of any amplicon and supports a variety of taxonomic databases for taxonomic assignment of 16S, ITS, CO1 and 18S amplicons. + + ```{r, eval = !isFALSE(params$mqc_plot) || !isFALSE(params$dada_filtntrim_args), results='asis'} cat("# Preprocessing\n") ``` -```{r, eval = !isFALSE(params$mqc_plot), results='asis'} -mqc_rep_path <- paste0("../multiqc/multiqc_report.html") + +```{r, eval = !isFALSE(params$mqc_plot), results='asis'} cat("## FastQC\n") cat("FastQC gives general quality metrics about your sequenced reads. ", "It provides information about the quality score distribution across your reads, ", @@ -152,13 +158,15 @@ cat("FastQC gives general quality metrics about your sequenced reads. ", cat("The sequence quality was checked using FastQC and resulting data was ", "aggregated using the FastQC module of MultiQC. For more quality ", "controls and per sample quality checks you can check the full ", - "MultiQC report, which is found [here](", mqc_rep_path, ").", sep = "") + "MultiQC report, which can be found in [multiqc/multiqc_report.html](../multiqc/multiqc_report.html).", sep = "") ``` ```{r, eval = !isFALSE(params$mqc_plot), out.width='100%', dpi=1200, fig.align='center'} knitr::include_graphics(params$mqc_plot) ``` + + ```{r, eval = !isFALSE(params$ca_sum_path), results='asis'} cat("## Primer removal with Cutadapt\n") cat("Cutadapt is trimming primer sequences from sequencing reads. ", @@ -200,6 +208,8 @@ datatable(cutadapt_summary, options = list( cat("\n\nCutadapt results can be found in folder [cutadapt](../cutadapt).") ``` + + ```{r, eval = !isFALSE(params$dada_filtntrim_args), results='asis'} cat("## Quality filtering using DADA2\n\n") cat("Additional quality filtering can improve sequence recovery. ", @@ -211,7 +221,9 @@ if (params$trunc_qmin) { f_and_tr_args), ", ") tr_len_f <- trunc_len[[1]][1] tr_len_r <- trunc_len[[1]][2] - cat("Reads were trimmed before median quality drops ", + cat("Reads were trimmed to a specific length and the length cutoff was ", + "automatically determined by the median quality of all input reads. ", + "Reads were trimmed before median quality drops ", "below ", params$trunc_qmin, " and at least ",params$trunc_rmin*100, "% of reads are retained, resulting in a trim of ", "forward reads at ", tr_len_f, " bp and reverse ", @@ -232,6 +244,8 @@ cat("Reads with more than", params$max_ee,"expected errors were discarded.", "column 'filtered'.", sep = " ") ``` + + ```{r, eval = !isFALSE(params$dada_qc_f_path), results='asis'} cat ("**Quality profiles:**\n\n") @@ -270,9 +284,11 @@ if (params$flag_single_end) { cat("Overall read quality profiles as heat map of the frequency of each quality score at each base position. ", "The mean quality score at each position is shown by the green line, and the quartiles of the quality score ", "distribution by the orange lines. The red line shows the scaled proportion of reads that extend to at least ", - "that position. Original plots can be found [folder dada2/QC/](../dada2/QC/) with names that end in '_qual_stats.pdf'.") + "that position. Original plots can be found [folder dada2/QC/](../dada2/QC/) with names that end in `_qual_stats.pdf`.") ``` + + ```{r, eval = !isFALSE(params$dada_err_path) || !isFALSE(params$dada_stats_path) || !isFALSE(params$asv_table_path), results='asis'} cat("# ASV inference using DADA2\n\n", "DADA2 performs fast and accurate sample inference from amplicon data with single-nucleotide resolution. @@ -282,6 +298,8 @@ cat("# ASV inference using DADA2\n\n", read pair merging (for paired end Illumina reads only) and PCR chimera removal.") ``` + + ```{r, eval = !isFALSE(params$dada_err_path), results='asis'} cat("## Error correction\n\n", "Read error correction was performed using estimated error rates, visualized below.\n") @@ -314,9 +332,11 @@ cat("Estimated error rates for each possible transition. The black line shows th convergence of the machine-learning algorithm. The red line shows the error rates expected under the nominal definition of the Q-score. The estimated error rates (black line) should be a good fit to the observed rates (points), and the error rates should drop with increased quality. Original plots can be found in - [folder dada2/QC/](../dada2/QC/) with names that end in '.err.pdf'.") + [folder dada2/QC/](../dada2/QC/) with names that end in `.err.pdf`.") ``` + + ```{r, eval = !isFALSE(params$dada_stats_path), results='asis'} cat("## Read counts per sample\n\n", "Tracking read numbers through DADA2 processing steps, for each sample. In the following table are read numbers after each processing stage.\n") @@ -339,9 +359,9 @@ datatable(dada_stats, options = list( scrollY = "300px", paging = FALSE)) -cat("Samples with unusual low reads numbers relative to the number of expected ASVs (e.g. 500 reads with 100 ASVs) - should be treated cautiously, because the abundance estimate will be very granular and might vary strongly between (theoretical) - replicates due to high impact of stochasticity. ") +cat("Samples with unusual low reads numbers relative to the number of expected ASVs + should be treated cautiously, because the abundance estimate will be very granular + and might vary strongly between (theoretical) replicates due to high impact of stochasticity. ") # Stacked barchart to num of reads @@ -409,15 +429,6 @@ if ( params$flag_single_end ) { } cat(":\n\n") -# Plot -#dada_stats_ex_t$steps_t <- factor(dada_stats_ex_t$steps_t, levels=unique(dada_stats_ex_t$steps_t)) -#ggplot(dada_stats_ex_t, aes(fill = steps_t, y = asvs_abs_t, x = samples_t)) + -# geom_bar(position = "stack", stat = "identity") + -# xlab("Samples") + -# ylab("Absolute reads") + -# coord_flip() + -# scale_fill_brewer("Filtering Steps", palette = "Spectral") - # Plot dada_stats_p_t$steps_t <- factor(dada_stats_p_t$steps_t, levels=unique(dada_stats_p_t$steps_t)) @@ -439,6 +450,8 @@ cat("\n\nBetween",min(dada_stats_p$analysis),"% and",max(dada_stats_p$analysis), (e.g. during nucleotide extraction, library preparation, or sequencing) could have occurred that might add bias to the analysis.") ``` + + ```{r, eval = !isFALSE(params$asv_table_path), results='asis'} cat("## Inferred ASVs\n\n") @@ -449,10 +462,10 @@ n_asv <- length(asv_table$ASV_ID) # Output text cat("Finally,", n_asv, "amplicon sequence variants (ASVs) were obtained across all samples. ") -cat("The ASVs can be found in ['dada2/ASV_seqs.fasta'](../dada2/). And the corresponding", +cat("The ASVs can be found in [`dada2/ASV_seqs.fasta`](../dada2/). And the corresponding", " quantification of the ASVs across samples is in", - "['dada2/ASV_table.tsv'](../dada2/). An extensive table containing both was ", - "saved as ['dada2/DADA2_table.tsv'](../dada2/). ") + "[`dada2/ASV_table.tsv`](../dada2/). An extensive table containing both was ", + "saved as [`dada2/DADA2_table.tsv`](../dada2/). ") if ( params$dada_sample_inference == "independent" ) { cat("ASVs were inferred for each sample independently.") } else if ( params$dada_sample_inference == "pooled" ) { @@ -466,10 +479,14 @@ if ( params$dada_sample_inference == "independent" ) { flag_any_filtering <- !isFALSE(params$path_barrnap_sum) || !isFALSE(params$filter_len_asv) || !isFALSE(params$filter_codons) ``` + + ```{r, eval = flag_any_filtering, results='asis'} cat("# Filtering of ASVs\n") ``` + + ```{r, eval = !isFALSE(params$path_barrnap_sum), results='asis'} cat("## rRNA detection\n") cat("Barrnap classifies the ASVs into the origin domain (including mitochondiral origin).\n\n", sep = "") @@ -520,9 +537,11 @@ svg("rrna_detection_with_barrnap.svg") plot_barrnap_df_sum invisible(dev.off()) -cat("\n\nrRNA filter results can be found in folder [barrnap](../barrnap).") +cat("\n\nrRNA classification results can be found in folder [barrnap](../barrnap).") ``` + + ```{r, eval = !isFALSE(params$path_barrnap_sum) && !isFALSE(params$filter_ssu), results='asis'} cat("\n\nASVs were filtered for (",params$filter_ssu,") using the above classification.", "The following table shows read counts for each sample before and after filtering:\n\n", sep = "") @@ -551,6 +570,8 @@ cat("In average", round(filter_ssu_stats_avg_removed,2), "% reads were removed, cat("The number of ASVs was reduced by",n_asv-filter_ssu_asv_filtered,"(",100-round( filter_ssu_asv_filtered/n_asv*100 ,2),"%), from",n_asv,"to",filter_ssu_asv_filtered," ASVs.") ``` + + ```{r, eval = !isFALSE(params$filter_len_asv_len_orig), results='asis'} cat("## Sequence length\n") @@ -630,6 +651,8 @@ cat("The number of ASVs was reduced by",sum(filter_len_profile$Counts)-sum(filte cat("\n\nLength filter results can be found in folder [asv_length_filter](../asv_length_filter).") ``` + + ```{r, eval = !isFALSE(params$filter_codons), results='asis'} cat("## Codon usage\n") @@ -653,6 +676,8 @@ datatable(filter_codons_stats, options = list( cat("\n\nCodon usage filter results can be found in folder [codon_filter](../codon_filter).") ``` + + ```{r, results='asis'} # Check if any taxonomic classification is available any_taxonomy <- !isFALSE(params$dada2_taxonomy) || !isFALSE(params$qiime2_taxonomy) || !isFALSE(params$sintax_taxonomy) || !isFALSE(params$pplace_taxonomy) @@ -663,6 +688,8 @@ any_taxonomy <- !isFALSE(params$dada2_taxonomy) || !isFALSE(params$qiime2_taxono cat("# Taxonomic Classification\n") ``` + + ```{r, eval = !isFALSE(params$cut_its), results='asis'} cat("## ITS region\n") cat("The ITS region was extracted from each ASV sequence using ITSx.", @@ -715,6 +742,8 @@ invisible(dev.off()) cat("\n\nITSx results can be found in folder [itsx](../itsx).") ``` + + ```{r, eval = !isFALSE(params$dada2_taxonomy), results='asis'} cat("## DADA2\n") @@ -805,9 +834,11 @@ svg("dada2_taxonomic_classification_per_taxonomy_level.svg") plot_asv_classi_df invisible(dev.off()) -cat("\n\nDADA2 taxonomy assignments can be found in folder [dada2](../dada2) in files 'ASV_tax_*.tsv'.") +cat("\n\nDADA2 taxonomy assignments can be found in folder [dada2](../dada2) in files `ASV_tax_*.tsv`.") ``` + + ```{r, eval = !isFALSE(params$qiime2_taxonomy), results='asis'} # Header cat("## QIIME2\n") @@ -874,6 +905,8 @@ invisible(dev.off()) cat("\n\nQIIME2 taxonomy assignments can be found in folder [qiime2/taxonomy](../qiime2/taxonomy).") ``` + + ```{r, eval = !isFALSE(params$sintax_taxonomy), results='asis'} # Header cat("## SINTAX\n") @@ -936,6 +969,8 @@ invisible(dev.off()) cat("\n\nSINTAX taxonomy assignments can be found in folder [sintax](../sintax).") ``` + + ```{r, eval = !isFALSE(params$pplace_taxonomy), results='asis'} # Header cat("## Phylogenetic Placement\n", @@ -998,9 +1033,11 @@ knitr::include_graphics(c(params$pplace_heattree)) ``` ```{r, eval = !isFALSE(params$pplace_taxonomy), results='asis'} -cat("\n\nPhylogenetic placement taxonomy assignments can be found in folder [pplace](../pplace) in file '*.taxonomy.per_query_unique.tsv'.") +cat("\n\nPhylogenetic placement taxonomy assignments can be found in folder [pplace](../pplace) in file `*.taxonomy.per_query_unique.tsv`.") ``` + + ```{r, eval = !isFALSE(params$val_used_taxonomy), results='asis'} # Header cat("# Downstream analysis with QIIME2\n", @@ -1008,11 +1045,13 @@ cat("# Downstream analysis with QIIME2\n", "Results of taxonomic classification of",params$val_used_taxonomy,"was used in all following analysis, see in the above sections.") ``` + + ```{r, eval = !isFALSE(params$filter_stats_tsv), results='asis'} cat("## ASV filtering\n", "Unwanted taxa are often off-targets generated in PCR with primers that are not perfectly specific for the target DNA, for 16S rRNA sequencing mitrochondria and chloroplast sequences are typically removed because these are frequent unwanted non-bacteria PCR products. ") if ( params$exclude_taxa != "none" ) { - cat("ASVs were removed when the taxonomic string contained any of '", params$exclude_taxa, "' (comma separated)") + cat("ASVs were removed when the taxonomic string contained any of `", params$exclude_taxa, "` (comma separated)", sep="") } if ( params$min_frequency != 1 ) { cat(", had fewer than", params$min_frequency ,"total read counts over all sample") @@ -1047,6 +1086,8 @@ datatable(filter_stats_tsv, options = list( cat("\n\nTables with read count numbers and filtered abundance tables are in folder [qiime2/abundance_tables](../qiime2/abundance_tables).") ``` + + ```{r, eval = !isFALSE(params$abundance_tables), results='asis'} cat("## Abundance tables\n", "The abundance tables are the final data for further downstream analysis and visualisations. The tables are based on the computed ASVs and taxonomic classification, but after removal of unwanted taxa. ", @@ -1057,6 +1098,8 @@ cat("\n\n## Relative abundance tables\n", "Folder [qiime2/rel_abundance_tables](../qiime2/rel_abundance_tables) contains tap-separated files (.tsv) that can be opened by any spreadsheet software.") ``` + + ```{r, eval = !isFALSE(params$barplot), results='asis'} cat("## Barplot\n", "Interactive abundance plot that aids exploratory browsing the discovered taxa and their abundance", @@ -1067,7 +1110,7 @@ cat("## Barplot\n", ```{r, eval = !isFALSE(params$metadata_category_barplot), results='asis'} cat("\n\nAdditionally, barplots with average relative abundance values were produced for", params$metadata_category_barplot,"(comma separated if several) in [qiime2/barplot_average](../qiime2/barplot_average) - in separate folders following the scheme 'barplot_{treatment}':\n") + in separate folders following the scheme `barplot_{treatment}`:\n") metadata_category_barplot <- sort( unlist( strsplit( params$metadata_category_barplot,"," ) ) ) for (category in metadata_category_barplot) { barplot_folder_path <- paste0("qiime2/barplot_average/barplot_",category) @@ -1075,6 +1118,8 @@ for (category in metadata_category_barplot) { } ``` + + ```{r, eval = !isFALSE(params$alpha_rarefaction), results='asis'} cat("## Alpha diversity rarefaction curves\n", "Produces rarefaction plots for several alpha diversity indices, and is primarily used to determine if the richness of the samples has been fully observed or sequenced. If the slope of the curves does not level out and the lines do not become horizontal, this might be because the sequencing depth was too low to observe all diversity or that sequencing error artificially increases sequence diversity and causes false discoveries. ") @@ -1085,6 +1130,8 @@ if ( params$dada_sample_inference == "independent") { cat("Folder [qiime2/alpha-rarefaction](../qiime2/alpha-rarefaction) contains the data, click [qiime2/alpha-rarefaction/index.html](../qiime2/alpha-rarefaction/index.html) to open it in your web browser.") ``` + + ```{r, eval = !isFALSE(params$diversity_indices_beta), results='asis'} diversity_indices_depth <- readLines(params$diversity_indices_depth) @@ -1110,7 +1157,7 @@ cat("This step calculates alpha diversity using various methods and performs pai "- Jaccard distance (qualitative): [qiime2/diversity/beta_diversity/jaccard_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/jaccard_pcoa_results-PCoA/index.html)\n", "- unweighted UniFrac distance (qualitative, phylogenetic) [qiime2/diversity/beta_diversity/unweighted_unifrac_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/unweighted_unifrac_pcoa_results-PCoA/index.html)\n", "- weighted UniFrac distance (quantitative, phylogenetic): [qiime2/diversity/beta_diversity/weighted_unifrac_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/weighted_unifrac_pcoa_results-PCoA/index.html)\n", - "2 Pairwise comparisons between groups of samples is performed for specific metadata that can be found in folder [qiime2/diversity/beta_diversity/](../qiime2/diversity/beta_diversity/). Each significance test result is in its separate folder following the scheme '{method}_distance_matrix-{treatment}':", + "2 Pairwise comparisons between groups of samples is performed for specific metadata that can be found in folder [qiime2/diversity/beta_diversity/](../qiime2/diversity/beta_diversity/). Each significance test result is in its separate folder following the scheme `{method}_distance_matrix-{treatment}`:", sep = "\n") diversity_indices_beta <- sort( unlist( strsplit( params$diversity_indices_beta,"," ) ) ) for (folder in diversity_indices_beta) { @@ -1123,13 +1170,13 @@ for (folder in diversity_indices_beta) { cat("_ADONIS test for beta diversity_\n\n") cat("Permutational multivariate analysis of variance using distance matrices (adonis) determines whether groups of samples are significantly different from one another. - The formula was '",params$qiime_adonis_formula,"' (multiple formulas are comma separated). + The formula was `",params$qiime_adonis_formula,"` (multiple formulas are comma separated). adonis computes an R2 value (effect size) which shows the percentage of variation explained by a condition, as well as a p-value to determine the statistical significance. The sequence of conditions in the formula matters, the variance of factors is removed (statistically controlled for) from beginning to end of the formula. " ) -cat("\n\nTest results are in separate folders following the scheme '{method}_distance_matrix-{adonis formula}':\n") +cat("\n\nTest results are in separate folders following the scheme `{method}_distance_matrix-{adonis formula}`:\n") diversity_indices_adonis <- sort( unlist( strsplit( params$diversity_indices_adonis,"," ) ) ) for (folder in diversity_indices_adonis) { adonis_index_path <- paste0("qiime2/diversity/",folder) #"beta_diversity/" is defined in input section with "stageAs: 'beta_diversity/adonis/*'" @@ -1137,6 +1184,8 @@ for (folder in diversity_indices_adonis) { } ``` + + ```{r, eval = !isFALSE(params$ancom), results='asis'} cat("## ANCOM\n\n") cat("Analysis of Composition of Microbiomes (ANCOM) is applied to identify features that are differentially @@ -1146,7 +1195,7 @@ cat("Analysis of Composition of Microbiomes (ANCOM) is applied to identify featu [qiime2/ancom/](../qiime2/ancom/). ", sep = "\n") -cat("\n\nTest results are in separate folders following the scheme 'Category-{treatment}-{taxonomic level}':\n") +cat("\n\nTest results are in separate folders following the scheme `Category-{treatment}-{taxonomic level}`:\n") ancom <- sort( unlist( strsplit( params$ancom,"," ) ) ) for (folder in ancom) { ancom_path <- paste0("qiime2/ancom/",folder) @@ -1154,22 +1203,46 @@ for (folder in ancom) { } ``` + + ```{r, eval = !isFALSE(params$picrust_pathways), results='asis'} cat("## PICRUSt2\n", "PICRUSt2 (Phylogenetic Investigation of Communities by Reconstruction of Unobserved States) is a software for predicting functional abundances based only on marker gene sequences.", "Enzyme Classification numbers (EC), KEGG orthologs (KO) and MetaCyc ontology predictions were made for each sample.", - "In folder [PICRUSt2/](../PICRUSt2/) are predicted quantifications for Enzyme Classification numbers (EC), see 'EC_pred_metagenome_unstrat_descrip.tsv', KEGG orthologs (KO), see 'KO_pred_metagenome_unstrat_descrip.tsv', MetaCyc ontology, see 'METACYC_path_abun_unstrat_descrip.tsv'.", + "In folder [PICRUSt2/](../PICRUSt2/) are predicted quantifications for Enzyme Classification numbers (EC), see `EC_pred_metagenome_unstrat_descrip.tsv`, KEGG orthologs (KO), see `KO_pred_metagenome_unstrat_descrip.tsv`, MetaCyc ontology, see `METACYC_path_abun_unstrat_descrip.tsv`.", "Quantifications are not normalized yet, they can be normalized e.g. by the total sum per sample.", sep = "\n") ``` + + +# Methods + +```{r, results='asis'} +if ( !isFALSE(params$mqc_plot) ) { + # with MultiQC + cat("MultiQC summarized computational methods in [multiqc/multiqc_report.html](../multiqc/multiqc_report.html). + The proposed short methods description can be found in [MultiQC's Methods Description](../multiqc/multiqc_report.html#nf-core-ampliseq-methods-description), + versions of software collected at runtime in [MultiQC's Software Versions](../multiqc/multiqc_report.html#software_versions), + and a summary of non-default parameter in [MultiQC's Workflow Summary](../multiqc/multiqc_report.html#nf-core-ampliseq-summary).\n\n") +} +# with & without MultiQC +cat("Technical information to the pipeline run are collected in folder [pipeline_info](../pipeline_info), + including software versions collected at runtime in file `software_versions.yml` (can be viewed with a text editor), + execution report in file `execution_report_{date}_{time}.html`, + execution trace in file `execution_trace_{date}_{time}.txt`, + execution timeline in file `execution_timelime_{date}_{time}.html`, and + pipeline direct acyclic graph (DAG) in file `pipeline_dag_{date}_{time}.html`.") +``` + + + # Final notes -This report (file 'summary_report.html') is located in folder [summary_report](.) of the original pipeline results folder. +This report (file `summary_report.html`) is located in folder [summary_report](.) of the original pipeline results folder. In this file, all links to files and folders are relative, therefore hyperlinks will only work when the report is at its original place in the pipeline results folder. Plots specifically produced for this report (if any) can be also found in folder [summary_report](.). -A comprehensive read count report throughout the pipeline can be found in the [base results folder](../) in file 'overall_summary.tsv'. -Technical information to the pipeline run are collected in folder [pipeline_info](../pipeline_info). +A comprehensive read count report throughout the pipeline can be found in the [base results folder](../) in file `overall_summary.tsv`. Please cite the [pipeline publication](https://doi.org/10.3389/fmicb.2020.550420) and any software tools used by the pipeline (see [citations](https://nf-co.re/ampliseq#citations)) when you use any of the pipeline results in your study. From ea20a1cf51f7754e2b1555389641407d932e218e Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 31 Jul 2023 17:06:42 +0200 Subject: [PATCH 090/230] add input section --- assets/report_template.Rmd | 42 +++++++++++++++++++++++++++++++++ modules/local/summary_report.nf | 8 ++++++- workflows/ampliseq.nf | 3 +++ 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 1c05a17e..da681919 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -12,6 +12,8 @@ output: date: "`r Sys.Date()`" #bibliography: ./references.bibtex params: + # any parameter that is by default "FALSE" is used to evaluate the inclusion of a codeblock with e.g. "eval=!isFALSE(params$mqc_plot)" + # report style css: NULL logo: NULL @@ -51,6 +53,10 @@ params: qiime_adonis_formula: FALSE # file paths + metadata: FALSE + samplesheet: FALSE + fasta: FALSE + input: FALSE mqc_plot: FALSE ca_sum_path: FALSE dada_filtntrim_args: FALSE @@ -142,6 +148,42 @@ subtitle: `r report_subtitle` The bioinformatics analysis pipeline [nfcore/ampliseq](https://nf-co.re/ampliseq) is used for amplicon sequencing, supporting denoising of any amplicon and supports a variety of taxonomic databases for taxonomic assignment of 16S, ITS, CO1 and 18S amplicons. + + +# Input + +Pipeline input was saved in folder [input](../input). + +```{r, results='asis'} +if ( !isFALSE(params$samplesheet) ) { + # samplesheet input + cat("Sequencing data was provided in the samplesheet file `", params$samplesheet, "` that is displayed below:", sep="") + + samplesheet <- read.table(file = params$samplesheet, header = TRUE, sep = "\t") + # Display table + datatable(samplesheet, options = list( + scrollX = TRUE, + scrollY = "300px", + paging = FALSE)) +} else if ( !isFALSE(params$fasta) ) { + # fasta input + cat("ASV/OTU sequences were provided in the fasta file `", params$fasta, "`. ", sep="") +} else if ( !isFALSE(params$input) ) { + # folder input + cat("Sequencing data was retrieved from folder `", params$fasta, "`. ", sep="") +} +if ( !isFALSE(params$metadata) ) { + cat("Metadata associated with the sequencing data was provided in `", params$metadata, "` and is displayed below:", sep="") + + metadata <- read.table(file = params$metadata, header = TRUE, sep = "\t") + # Display table + datatable(metadata, options = list( + scrollX = TRUE, + scrollY = "300px", + paging = FALSE)) +} +``` + ```{r, eval = !isFALSE(params$mqc_plot) || !isFALSE(params$dada_filtntrim_args), results='asis'} diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 5ffe6d05..a9d2f8c6 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -1,5 +1,4 @@ process SUMMARY_REPORT { - label 'process_low' container 'docker.io/tillenglert/ampliseq_report:latest' @@ -20,6 +19,9 @@ process SUMMARY_REPORT { path(report_template) path(report_styles) path(report_logo) + path(metadata) + path(samplesheet) + path(fasta) path(mqc_plots) path(ca_summary) val(find_truncation_values) @@ -76,6 +78,10 @@ process SUMMARY_REPORT { "workflow_manifest_version='${workflow.manifest.version}'", "workflow_scriptid='${workflow.scriptId.substring(0,10)}'", meta.single_end ? "flag_single_end=TRUE" : "", + metadata ? "metadata='$metadata'" : "", + samplesheet ? "samplesheet='$samplesheet'" : "", + fasta ? "fasta='$fasta'" : "", + !fasta && !samplesheet ? "input='$params.input'" : "", mqc_plots ? "mqc_plot='${mqc_plots}/svg/mqc_fastqc_per_sequence_quality_scores_plot_1.svg'" : "", ca_summary ? params.retain_untrimmed ? "flag_retain_untrimmed=TRUE,ca_sum_path='$ca_summary'" : diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 3da0aa28..1a4d2970 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -686,6 +686,9 @@ workflow AMPLISEQ { ch_report_template, ch_report_css, ch_report_logo, + ch_metadata.ifEmpty( [] ), + params.input.toString().toLowerCase().endsWith("tsv") ? ch_input : [], // samplesheet input + is_fasta_input ? PARSE_INPUT.out.fasta.ifEmpty( [] ) : [], // fasta input !is_fasta_input && !params.skip_fastqc && !params.skip_multiqc ? MULTIQC.out.plots : [], //.collect().flatten().collectFile(name: "mqc_fastqc_per_sequence_quality_scores_plot_1.svg") !params.skip_cutadapt ? CUTADAPT_WORKFLOW.out.summary.collect().ifEmpty( [] ) : [], find_truncation_values, From f81dfd95aaa5b8b426f9a19c42520b846af99326 Mon Sep 17 00:00:00 2001 From: Till Englert Date: Wed, 2 Aug 2023 06:56:04 +0200 Subject: [PATCH 091/230] Add functional Biocontainer --- modules/local/summary_report.nf | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index a9d2f8c6..86a058c4 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -1,19 +1,10 @@ process SUMMARY_REPORT { label 'process_low' - container 'docker.io/tillenglert/ampliseq_report:latest' - /* this is from https://github.com/nf-core/modules/blob/master/modules/nf-core/rmarkdownnotebook/main.nf but doesnt work - conda "conda-forge::r-base=4.1.0 conda-forge::r-rmarkdown=2.9 conda-forge::r-yaml=2.2.1" + conda "conda-forge::r-base=4.2.3 conda-forge::r-rmarkdown=2.22 conda-forge::r-tidyverse=2.0.0 conda-forge::r-knitr=1.43 conda-forge::r-dt=0.28 conda-forge::r-dtplyr=1.3.1 conda-forge::r-formattable=0.2.1 conda-forge::r-purrr=1.0.1 conda-forge::r-vegan=2.6_4 conda-forge::r-optparse=1.7.3 conda-forge::r-ggplot2=3.4.2 conda-forge::r-dplyr=1.1.2 conda-forge::r-data.table=1.14.8 conda-forge::r-patchwork=1.1.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-31ad840d814d356e5f98030a4ee308a16db64ec5:0e852a1e4063fdcbe3f254ac2c7469747a60e361-0' : - 'biocontainers/mulled-v2-31ad840d814d356e5f98030a4ee308a16db64ec5:0e852a1e4063fdcbe3f254ac2c7469747a60e361-0' }" - */ - /* this is from https://github.com/BioContainers/multi-package-containers/pull/2663 but doesnt work: /usr/local/bin/pandoc: error while loading shared libraries: libgmp.so.10: cannot open shared object file: No such file or directory - conda "conda-forge::r-base=4.2.3 conda-forge::r-rmarkdown=2.22 conda-forge::r-tidyverse=2.0.0 conda-forge::r-knitr=1.43 conda-forge::r-dt=0.28 conda-forge::r-dtplyr=1.3.1 conda-forge::r-formattable=0.2.1 conda-forge::r-purrr=1.0.1 conda-forge::r-vegan=2.6_4 conda-forge::r-optparse=1.7.3 conda-forge::r-ggplot2=3.4.2 conda-forge::r-dplyr=1.1.2 conda-forge::r-data.table=1.14.8 conda-forge::pandoc=2.19.2 conda-forge::r-patchwork=1.1.2" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-6726188ca70388ddb300400dc7fe71101a4f89f2:0346b3395cd017327aff8dae37aad0a027a7613c-0' : - 'biocontainers/mulled-v2-6726188ca70388ddb300400dc7fe71101a4f89f2:0346b3395cd017327aff8dae37aad0a027a7613c-0' }" - */ + 'https://depot.galaxyproject.org/singularity/mulled-v2-b2ec1fea5791d428eebb8c8ea7409c350d31dada:a447f6b7a6afde38352b24c30ae9cd6e39df95c4-1' : + 'biocontainers/mulled-v2-b2ec1fea5791d428eebb8c8ea7409c350d31dada:a447f6b7a6afde38352b24c30ae9cd6e39df95c4-1' }" input: path(report_template) From 04fd6a1fc606c98de94c18ecb211690dbf8c317f Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 2 Aug 2023 16:03:31 +0200 Subject: [PATCH 092/230] implement internal review suggestions --- assets/report_template.Rmd | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index da681919..7929505c 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -150,11 +150,15 @@ supporting denoising of any amplicon and supports a variety of taxonomic databas -# Input - -Pipeline input was saved in folder [input](../input). - ```{r, results='asis'} +if ( !isFALSE(params$metadata) ) { + cat("# Input and metadata\n\n", + "Pipeline input was saved in folder [input](../input).\n\n") +} else { + cat("# Input\n\n", + "Pipeline input was saved in folder [input](../input).\n\n") +} + if ( !isFALSE(params$samplesheet) ) { # samplesheet input cat("Sequencing data was provided in the samplesheet file `", params$samplesheet, "` that is displayed below:", sep="") @@ -401,9 +405,13 @@ datatable(dada_stats, options = list( scrollY = "300px", paging = FALSE)) -cat("Samples with unusual low reads numbers relative to the number of expected ASVs - should be treated cautiously, because the abundance estimate will be very granular - and might vary strongly between (theoretical) replicates due to high impact of stochasticity. ") +cat(paste0("*** +Samples with unusual low reads numbers relative to the number of expected ASVs +should be treated cautiously, because the abundance estimate will be very granular +and might vary strongly between (theoretical) replicates due to high impact of stochasticity. + +Following, the numbers of the table above are shown in stacked barcharts as percentage of DADA2 input reads. +")) # Stacked barchart to num of reads @@ -553,7 +561,7 @@ n_arc <- sum(grepl("arc", barrnap_sum$result)) n_mito <- sum(grepl("mito", barrnap_sum$result)) n_euk <- sum(grepl("euk", barrnap_sum$result)) -barrnap_df_sum <- data.frame(label=c('Bacteria','Archea','Mitochondria','Eukaryotes','Unclassified'), +barrnap_df_sum <- data.frame(label=c('Bacteria','Archaea','Mitochondria','Eukaryotes','Unclassified'), count=c(n_bac,n_arc,n_mito,n_euk,n_asv - n_classified), percent=c(round( (n_bac/n_asv)*100, 2), round( (n_arc/n_asv)*100, 2), round( (n_mito/n_asv)*100, 2), round( (n_euk/n_asv)*100, 2), round( ( (n_asv - n_classified) /n_asv)*100, 2) ) ) @@ -585,8 +593,8 @@ cat("\n\nrRNA classification results can be found in folder [barrnap](../barrnap ```{r, eval = !isFALSE(params$path_barrnap_sum) && !isFALSE(params$filter_ssu), results='asis'} -cat("\n\nASVs were filtered for (",params$filter_ssu,") using the above classification.", - "The following table shows read counts for each sample before and after filtering:\n\n", sep = "") +cat("\n\nASVs were filtered for `",params$filter_ssu,"` (`bac`: bacteria, `arc`: archaea, `mito`: metazoan mitochondria, `euk`: eukaryotes) + using the above classification. The following table shows read counts for each sample before and after filtering:\n\n", sep = "") # Read the barrnap stats file filter_ssu_stats = read.table( params$filter_ssu_stats, header = TRUE, sep = "\t", stringsAsFactors = FALSE) @@ -618,7 +626,7 @@ cat("The number of ASVs was reduced by",n_asv-filter_ssu_asv_filtered,"(",100-ro cat("## Sequence length\n") cat("A length filter was used to reduce potential contamination after ASV computation.", - "Before filtering, ASVs had the following length profile (count of 1 was transformed to 1.5 to plot on log10 scale):\n\n") + "Before filtering, ASVs had the following length profile (count of 1 was transformed to 1.5 to allow plotting on log10 scale):\n\n") # ASV length profile @@ -1091,7 +1099,8 @@ cat("# Downstream analysis with QIIME2\n", ```{r, eval = !isFALSE(params$filter_stats_tsv), results='asis'} cat("## ASV filtering\n", - "Unwanted taxa are often off-targets generated in PCR with primers that are not perfectly specific for the target DNA, for 16S rRNA sequencing mitrochondria and chloroplast sequences are typically removed because these are frequent unwanted non-bacteria PCR products. ") + "Unwanted taxa are often off-targets generated in PCR with primers that are not perfectly specific for the target DNA. + For 16S rRNA sequencing mitrochondria and chloroplast sequences are typically removed because these are frequent unwanted non-bacteria PCR products. ") if ( params$exclude_taxa != "none" ) { cat("ASVs were removed when the taxonomic string contained any of `", params$exclude_taxa, "` (comma separated)", sep="") } From 94c3c1b074fd9c81a3010deb17f1cc83fd91dd76 Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 2 Aug 2023 17:00:07 +0200 Subject: [PATCH 093/230] add more details for taxonomic references --- assets/report_template.Rmd | 43 +++++++++++++++++++++++++++------ modules/local/summary_report.nf | 6 ++--- 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 7929505c..d49fdd29 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -41,9 +41,15 @@ params: min_len_asv: "" max_len_asv: "" cut_its: FALSE - dada2_ref_tax_title: "" - qiime2_ref_tax_title: "" - sintax_ref_tax_title: "" + dada2_ref_tax_title: FALSE + qiime2_ref_tax_title: FALSE + sintax_ref_tax_title: FALSE + dada2_ref_tax_file: "" + qiime2_ref_tax_file: "" + sintax_ref_tax_file: "" + dada2_ref_tax_citation: "" + qiime2_ref_tax_citation: "" + sintax_ref_tax_citation: "" exclude_taxa: "" min_frequency: "" min_samples: "" @@ -799,8 +805,8 @@ cat("## DADA2\n") # indicate reference taxonomy if (!params$flag_ref_tax_user) { - cat("The taxonomic classification was performed by DADA2 using the database: ", - "\"", params$dada2_ref_tax_title, "\".\n\n", sep = "") + cat("The taxonomic classification was performed by DADA2 using the database: `", params$dada2_ref_tax_title, "`. + More details can be found in the ['Methods section'](#methods).\n\n", sep = "") } else { cat("The taxonomic classification was performed by DADA2 using a custom database ", "provided by the user.\n\n", sep = "") @@ -893,7 +899,8 @@ cat("\n\nDADA2 taxonomy assignments can be found in folder [dada2](../dada2) in # Header cat("## QIIME2\n") -cat("The taxonomic classification was performed by QIIME2 using the database: \"", params$qiime2_ref_tax_title, "\".\n\n", sep = "") +cat("The taxonomic classification was performed by QIIME2 using the database: `", params$qiime2_ref_tax_title, "`. + More details can be found in the ['Methods section'](#methods).\n\n", sep = "") # Read file and prepare table asv_tax <- read.table(params$qiime2_taxonomy, header = TRUE, sep = "\t") @@ -961,7 +968,8 @@ cat("\n\nQIIME2 taxonomy assignments can be found in folder [qiime2/taxonomy](.. # Header cat("## SINTAX\n") -cat("The taxonomic classification was performed by SINTAX using the database: \"", params$sintax_ref_tax_title, "\".\n\n", sep = "") +cat("The taxonomic classification was performed by SINTAX using the database: `", params$sintax_ref_tax_title, "`. + More details can be found in the ['Methods section'](#methods).\n\n", sep = "") asv_tax <- read.table(params$sintax_taxonomy, header = TRUE, sep = "\t") @@ -1270,6 +1278,27 @@ cat("## PICRUSt2\n", # Methods ```{r, results='asis'} +if ( !isFALSE(params$dada2_ref_tax_title) ) { + cat("Taxonomic classification by DADA2:\n\n", + "- database: `", params$dada2_ref_tax_title, "`\n\n", + "- files: `", params$dada2_ref_tax_file, "`\n\n", + "- citation: `", params$dada2_ref_tax_citation, "`\n\n", sep = "") +} + +if ( !isFALSE(params$qiime2_ref_tax_title) ) { + cat("Taxonomic classification by QIIME2:\n\n", + "- database: `", params$qiime2_ref_tax_title, "`\n\n", + "- files: `", params$qiime2_ref_tax_file, "`\n\n", + "- citation: `", params$qiime2_ref_tax_citation, "`\n\n", sep = "") +} + +if ( !isFALSE(params$sintax_ref_tax_title) ) { + cat("Taxonomic classification by SINTAX:\n\n", + "- database: `", params$sintax_ref_tax_title, "`\n\n", + "- files: `", params$sintax_ref_tax_file, "`\n\n", + "- citation: `", params$sintax_ref_tax_citation, "`\n\n", sep = "") +} + if ( !isFALSE(params$mqc_plot) ) { # with MultiQC cat("MultiQC summarized computational methods in [multiqc/multiqc_report.html](../multiqc/multiqc_report.html). diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 86a058c4..0295d73a 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -102,11 +102,11 @@ process SUMMARY_REPORT { itsx_cutasv_summary ? "itsx_cutasv_summary='$itsx_cutasv_summary',cut_its='$params.cut_its'" : "", !dada2_tax ? "" : params.dada_ref_tax_custom ? "dada2_taxonomy='$dada2_tax',flag_ref_tax_user=TRUE" : - "dada2_taxonomy='$dada2_tax',dada2_ref_tax_title='${params.dada_ref_databases[params.dada_ref_taxonomy]["title"]}'", + "dada2_taxonomy='$dada2_tax',dada2_ref_tax_title='${params.dada_ref_databases[params.dada_ref_taxonomy]["title"]}',dada2_ref_tax_file='${params.dada_ref_databases[params.dada_ref_taxonomy]["file"]}',dada2_ref_tax_citation='${params.dada_ref_databases[params.dada_ref_taxonomy]["citation"]}'", cut_dada_ref_taxonomy ? "cut_dada_ref_taxonomy='$cut_dada_ref_taxonomy'" : "", - sintax_tax ? "sintax_taxonomy='$sintax_tax',sintax_ref_tax_title='${params.sintax_ref_databases[params.sintax_ref_taxonomy]["title"]}'" : "", + sintax_tax ? "sintax_taxonomy='$sintax_tax',sintax_ref_tax_title='${params.sintax_ref_databases[params.sintax_ref_taxonomy]["title"]}',sintax_ref_tax_file='${params.sintax_ref_databases[params.sintax_ref_taxonomy]["file"]}',sintax_ref_tax_citation='${params.sintax_ref_databases[params.sintax_ref_taxonomy]["citation"]}'" : "", pplace_tax ? "pplace_taxonomy='$pplace_tax',pplace_heattree='$pplace_heattree'" : "", - qiime2_tax ? "qiime2_taxonomy='$qiime2_tax',qiime2_ref_tax_title='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}'" : "", + qiime2_tax ? "qiime2_taxonomy='$qiime2_tax',qiime2_ref_tax_title='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}',qiime2_ref_tax_file='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]}',qiime2_ref_tax_citation='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["citation"]}'" : "", run_qiime2 ? "val_used_taxonomy='$val_used_taxonomy'" : "", filter_stats_tsv ? "filter_stats_tsv='$filter_stats_tsv',qiime2_filtertaxa='$qiime2_filtertaxa',exclude_taxa='$params.exclude_taxa',min_frequency='$params.min_frequency',min_samples='$params.min_samples'" : "", barplot ? "barplot=TRUE" : "", From 81af72742edd821333e6f24c28c4bd59d4429158 Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 2 Aug 2023 17:16:24 +0200 Subject: [PATCH 094/230] add citations and links for tools --- assets/report_template.Rmd | 40 +++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index d49fdd29..648a44f2 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -204,11 +204,11 @@ cat("# Preprocessing\n") ```{r, eval = !isFALSE(params$mqc_plot), results='asis'} cat("## FastQC\n") -cat("FastQC gives general quality metrics about your sequenced reads. ", +cat("[FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. ", "It provides information about the quality score distribution across your reads, ", "per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences.\n") cat("The sequence quality was checked using FastQC and resulting data was ", - "aggregated using the FastQC module of MultiQC. For more quality ", + "aggregated using the FastQC module of [MultiQC](https://multiqc.info/). For more quality ", "controls and per sample quality checks you can check the full ", "MultiQC report, which can be found in [multiqc/multiqc_report.html](../multiqc/multiqc_report.html).", sep = "") ``` @@ -221,7 +221,8 @@ knitr::include_graphics(params$mqc_plot) ```{r, eval = !isFALSE(params$ca_sum_path), results='asis'} cat("## Primer removal with Cutadapt\n") -cat("Cutadapt is trimming primer sequences from sequencing reads. ", +cat("[Cutadapt](https://journal.embnet.org/index.php/embnetjournal/article/view/200)", + "is trimming primer sequences from sequencing reads. ", "Primer sequences are non-biological sequences that often introduce ", "point mutations that do not reflect sample sequences. This is especially ", "true for degenerated PCR primer. If primer trimming were to be omitted, artifactual ", @@ -343,7 +344,7 @@ cat("Overall read quality profiles as heat map of the frequency of each quality ```{r, eval = !isFALSE(params$dada_err_path) || !isFALSE(params$dada_stats_path) || !isFALSE(params$asv_table_path), results='asis'} cat("# ASV inference using DADA2\n\n", - "DADA2 performs fast and accurate sample inference from amplicon data with single-nucleotide resolution. + "[DADA2](https://doi.org/10.1038/nmeth.3869) performs fast and accurate sample inference from amplicon data with single-nucleotide resolution. It infers exact amplicon sequence variants (ASVs) from amplicon data with fewer false positives than many other methods while maintaining high sensitivity.\n\n", "DADA2 reduces sequence errors and dereplicates sequences by quality filtering, denoising, @@ -545,7 +546,7 @@ cat("# Filtering of ASVs\n") ```{r, eval = !isFALSE(params$path_barrnap_sum), results='asis'} cat("## rRNA detection\n") -cat("Barrnap classifies the ASVs into the origin domain (including mitochondiral origin).\n\n", sep = "") +cat("[Barrnap](https://github.com/tseemann/barrnap) classifies the ASVs into the origin domain (including mitochondiral origin).\n\n", sep = "") # Read the barrnap files and count the lines barrnap_sum = read.table( params$path_barrnap_sum, header = TRUE, sep = "\t", stringsAsFactors = FALSE) @@ -748,7 +749,7 @@ cat("# Taxonomic Classification\n") ```{r, eval = !isFALSE(params$cut_its), results='asis'} cat("## ITS region\n") -cat("The ITS region was extracted from each ASV sequence using ITSx.", +cat("The ITS region was extracted from each ASV sequence using [ITSx](https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.12073).", "Taxonomic classification should have improved performance based on extracted ITS sequence.\n") cat("The extracted ITS region is",params$cut_its,"sequence. ") @@ -805,7 +806,8 @@ cat("## DADA2\n") # indicate reference taxonomy if (!params$flag_ref_tax_user) { - cat("The taxonomic classification was performed by DADA2 using the database: `", params$dada2_ref_tax_title, "`. + cat("The taxonomic classification was performed by [DADA2](https://pubmed.ncbi.nlm.nih.gov/27214047/) + using the database: `", params$dada2_ref_tax_title, "`. More details can be found in the ['Methods section'](#methods).\n\n", sep = "") } else { cat("The taxonomic classification was performed by DADA2 using a custom database ", @@ -899,7 +901,8 @@ cat("\n\nDADA2 taxonomy assignments can be found in folder [dada2](../dada2) in # Header cat("## QIIME2\n") -cat("The taxonomic classification was performed by QIIME2 using the database: `", params$qiime2_ref_tax_title, "`. +cat("The taxonomic classification was performed by [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) + using the database: `", params$qiime2_ref_tax_title, "`. More details can be found in the ['Methods section'](#methods).\n\n", sep = "") # Read file and prepare table @@ -968,7 +971,8 @@ cat("\n\nQIIME2 taxonomy assignments can be found in folder [qiime2/taxonomy](.. # Header cat("## SINTAX\n") -cat("The taxonomic classification was performed by SINTAX using the database: `", params$sintax_ref_tax_title, "`. +cat("The taxonomic classification was performed by [SINTAX](https://doi.org/10.1101/074161) + using the database: `", params$sintax_ref_tax_title, "`. More details can be found in the ['Methods section'](#methods).\n\n", sep = "") asv_tax <- read.table(params$sintax_taxonomy, header = TRUE, sep = "\t") @@ -1032,8 +1036,10 @@ cat("\n\nSINTAX taxonomy assignments can be found in folder [sintax](../sintax). ```{r, eval = !isFALSE(params$pplace_taxonomy), results='asis'} # Header cat("## Phylogenetic Placement\n", - "Phylogenetic placement grafts sequences onto a phylogenetic reference tree and optionally outputs taxonomic annotations. The reference tree is ideally made from full-length high-quality sequences containing better evolutionary signal than short amplicons. It is hence superior to estimating de-novo phylogenetic trees from short amplicon sequences. ", - "Extraction of taxonomic classification was performed with EPA-NG and GAPPA. ") + "Phylogenetic placement grafts sequences onto a phylogenetic reference tree and optionally outputs taxonomic annotations.", + "The reference tree is ideally made from full-length high-quality sequences containing better evolutionary signal than short amplicons.", + "It is hence superior to estimating de-novo phylogenetic trees from short amplicon sequences. ", + "Extraction of taxonomic classification was performed with [EPA-NG](https://github.com/Pbdas/epa-ng) and [Gappa](https://pubmed.ncbi.nlm.nih.gov/32016344/). ") # Read file and prepare table asv_tax <- read.table(params$pplace_taxonomy, header = TRUE, sep = "\t") @@ -1099,7 +1105,7 @@ cat("\n\nPhylogenetic placement taxonomy assignments can be found in folder [ppl ```{r, eval = !isFALSE(params$val_used_taxonomy), results='asis'} # Header cat("# Downstream analysis with QIIME2\n", - "Files that were input to QIIME2 can be found in folder [qiime2/input/](../qiime2/input/).", + "Files that were input to [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) can be found in folder [qiime2/input/](../qiime2/input/).", "Results of taxonomic classification of",params$val_used_taxonomy,"was used in all following analysis, see in the above sections.") ``` @@ -1227,7 +1233,8 @@ for (folder in diversity_indices_beta) { ```{r, eval = !isFALSE(params$qiime_adonis_formula), results='asis'} cat("_ADONIS test for beta diversity_\n\n") -cat("Permutational multivariate analysis of variance using distance matrices (adonis) +cat("Permutational multivariate analysis of variance using distance matrices + [adonis](https://doi.org/10.1111/j.1442-9993.2001.01070.pp.x) (in [VEGAN](https://CRAN.R-project.org/package=vegan)) determines whether groups of samples are significantly different from one another. The formula was `",params$qiime_adonis_formula,"` (multiple formulas are comma separated). adonis computes an R2 value (effect size) which shows the percentage of variation explained @@ -1247,7 +1254,8 @@ for (folder in diversity_indices_adonis) { ```{r, eval = !isFALSE(params$ancom), results='asis'} cat("## ANCOM\n\n") -cat("Analysis of Composition of Microbiomes (ANCOM) is applied to identify features that are differentially +cat("[Analysis of Composition of Microbiomes (ANCOM)](https://www.ncbi.nlm.nih.gov/pubmed/26028277) + is applied to identify features that are differentially abundant across sample groups. A key assumption made by ANCOM is that few taxa (less than about 25%) will be differentially abundant between groups otherwise the method will be inaccurate. Comparisons between groups of samples is performed for specific metadata that can be found in folder @@ -1266,7 +1274,7 @@ for (folder in ancom) { ```{r, eval = !isFALSE(params$picrust_pathways), results='asis'} cat("## PICRUSt2\n", - "PICRUSt2 (Phylogenetic Investigation of Communities by Reconstruction of Unobserved States) is a software for predicting functional abundances based only on marker gene sequences.", + "[PICRUSt2](https://pubmed.ncbi.nlm.nih.gov/32483366/) (Phylogenetic Investigation of Communities by Reconstruction of Unobserved States) is a software for predicting functional abundances based only on marker gene sequences.", "Enzyme Classification numbers (EC), KEGG orthologs (KO) and MetaCyc ontology predictions were made for each sample.", "In folder [PICRUSt2/](../PICRUSt2/) are predicted quantifications for Enzyme Classification numbers (EC), see `EC_pred_metagenome_unstrat_descrip.tsv`, KEGG orthologs (KO), see `KO_pred_metagenome_unstrat_descrip.tsv`, MetaCyc ontology, see `METACYC_path_abun_unstrat_descrip.tsv`.", "Quantifications are not normalized yet, they can be normalized e.g. by the total sum per sample.", @@ -1301,7 +1309,7 @@ if ( !isFALSE(params$sintax_ref_tax_title) ) { if ( !isFALSE(params$mqc_plot) ) { # with MultiQC - cat("MultiQC summarized computational methods in [multiqc/multiqc_report.html](../multiqc/multiqc_report.html). + cat("[MultiQC](https://multiqc.info/) summarized computational methods in [multiqc/multiqc_report.html](../multiqc/multiqc_report.html). The proposed short methods description can be found in [MultiQC's Methods Description](../multiqc/multiqc_report.html#nf-core-ampliseq-methods-description), versions of software collected at runtime in [MultiQC's Software Versions](../multiqc/multiqc_report.html#software_versions), and a summary of non-default parameter in [MultiQC's Workflow Summary](../multiqc/multiqc_report.html#nf-core-ampliseq-summary).\n\n") From 8225269c959546de6296753d4f88d3e77177f8ca Mon Sep 17 00:00:00 2001 From: daniel Date: Thu, 3 Aug 2023 15:34:07 +0200 Subject: [PATCH 095/230] add report title and abstract --- assets/report_template.Rmd | 422 ++++++++++++++++++++------------ modules/local/summary_report.nf | 5 +- nextflow.config | 2 + nextflow_schema.json | 10 + workflows/ampliseq.nf | 2 + 5 files changed, 286 insertions(+), 155 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 648a44f2..8ab387b6 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -9,15 +9,15 @@ output: df_print: paged # tables are printed as an html table with support for pagination over rows and columns highlight: pygments pdf_document: true -date: "`r Sys.Date()`" #bibliography: ./references.bibtex params: # any parameter that is by default "FALSE" is used to evaluate the inclusion of a codeblock with e.g. "eval=!isFALSE(params$mqc_plot)" # report style css: NULL - logo: NULL - input_dir: "./" + report_logo: NULL + report_title: "Summary of analysis results" + report_abstract: FALSE # pipeline versions workflow_manifest_version: NULL @@ -124,7 +124,7 @@ htmltools::includeCSS(params$css) cat(paste0(" ")) @@ -138,36 +138,55 @@ if ( endsWith( params$workflow_manifest_version, "dev") ) { } else { ampliseq_version = paste0("version ",params$workflow_manifest_version) } -report_title <- "Summary of analysis results" +report_title <- params$report_title report_subtitle <- paste0('nf-core/ampliseq workflow ', ampliseq_version) ``` --- -title: "`r report_title`" +title: "`r report_title`" subtitle: `r report_subtitle` +date: '`r format(Sys.Date(), "%B %d, %Y")`' +--- + --- +```{r, results='asis'} +if ( !isFALSE(params$report_abstract) ) { + report_abstract <- paste(readLines(params$report_abstract), collapse="\n") + cat(report_abstract) +} else { + # with tab indentation, the following will be a code block! + cat(paste0(" # Abstract The bioinformatics analysis pipeline [nfcore/ampliseq](https://nf-co.re/ampliseq) is used for amplicon sequencing, supporting denoising of any amplicon and supports a variety of taxonomic databases for taxonomic assignment of 16S, ITS, CO1 and 18S amplicons. + ")) +} +``` ```{r, results='asis'} if ( !isFALSE(params$metadata) ) { - cat("# Input and metadata\n\n", - "Pipeline input was saved in folder [input](../input).\n\n") + cat(paste0(" +# Data input and Metadata + +Pipeline input was saved in folder [input](../input). + ")) } else { - cat("# Input\n\n", - "Pipeline input was saved in folder [input](../input).\n\n") + cat(paste0(" +# Data input + +Pipeline input was saved in folder [input](../input). + ")) } if ( !isFALSE(params$samplesheet) ) { # samplesheet input - cat("Sequencing data was provided in the samplesheet file `", params$samplesheet, "` that is displayed below:", sep="") + cat("\nSequencing data was provided in the samplesheet file `", params$samplesheet, "` that is displayed below:", sep="") samplesheet <- read.table(file = params$samplesheet, header = TRUE, sep = "\t") # Display table @@ -177,13 +196,13 @@ if ( !isFALSE(params$samplesheet) ) { paging = FALSE)) } else if ( !isFALSE(params$fasta) ) { # fasta input - cat("ASV/OTU sequences were provided in the fasta file `", params$fasta, "`. ", sep="") + cat("\nASV/OTU sequences were provided in the fasta file `", params$fasta, "`. ", sep="") } else if ( !isFALSE(params$input) ) { # folder input - cat("Sequencing data was retrieved from folder `", params$fasta, "`. ", sep="") + cat("\nSequencing data was retrieved from folder `", params$fasta, "`. ", sep="") } if ( !isFALSE(params$metadata) ) { - cat("Metadata associated with the sequencing data was provided in `", params$metadata, "` and is displayed below:", sep="") + cat("\nMetadata associated with the sequencing data was provided in `", params$metadata, "` and is displayed below:", sep="") metadata <- read.table(file = params$metadata, header = TRUE, sep = "\t") # Display table @@ -203,14 +222,15 @@ cat("# Preprocessing\n") ```{r, eval = !isFALSE(params$mqc_plot), results='asis'} -cat("## FastQC\n") -cat("[FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. ", - "It provides information about the quality score distribution across your reads, ", - "per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences.\n") -cat("The sequence quality was checked using FastQC and resulting data was ", - "aggregated using the FastQC module of [MultiQC](https://multiqc.info/). For more quality ", - "controls and per sample quality checks you can check the full ", - "MultiQC report, which can be found in [multiqc/multiqc_report.html](../multiqc/multiqc_report.html).", sep = "") +cat(paste0(" +## FastQC + +[FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. +It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), +adapter contamination and overrepresented sequences. The sequence quality was checked using FastQC and resulting data was +aggregated using the FastQC module of [MultiQC](https://multiqc.info/). For more quality controls and per sample quality checks you can check the full +MultiQC report, which can be found in [multiqc/multiqc_report.html](../multiqc/multiqc_report.html). +")) ``` ```{r, eval = !isFALSE(params$mqc_plot), out.width='100%', dpi=1200, fig.align='center'} @@ -220,14 +240,14 @@ knitr::include_graphics(params$mqc_plot) ```{r, eval = !isFALSE(params$ca_sum_path), results='asis'} -cat("## Primer removal with Cutadapt\n") -cat("[Cutadapt](https://journal.embnet.org/index.php/embnetjournal/article/view/200)", - "is trimming primer sequences from sequencing reads. ", - "Primer sequences are non-biological sequences that often introduce ", - "point mutations that do not reflect sample sequences. This is especially ", - "true for degenerated PCR primer. If primer trimming were to be omitted, artifactual ", - "amplicon sequence variants might be computed by the denoising tool or ", - "sequences might be lost due to being labelled as PCR chimera.\n\n") +cat(paste0(" +## Primer removal with Cutadapt + +[Cutadapt](https://journal.embnet.org/index.php/embnetjournal/article/view/200) is trimming primer sequences from sequencing reads. +Primer sequences are non-biological sequences that often introduce point mutations that do not reflect sample sequences. This is especially +true for degenerated PCR primer. If primer trimming were to be omitted, artifactual amplicon sequence variants might be computed by +the denoising tool or sequences might be lost due to being labelled as PCR chimera. +")) # import tsv cutadapt_summary <- read.table(file = params$ca_sum_path, header = TRUE, sep = "\t") @@ -264,9 +284,12 @@ cat("\n\nCutadapt results can be found in folder [cutadapt](../cutadapt).") ```{r, eval = !isFALSE(params$dada_filtntrim_args), results='asis'} -cat("## Quality filtering using DADA2\n\n") -cat("Additional quality filtering can improve sequence recovery. ", - "Often it is advised trimming the last few nucleotides to avoid less well-controlled errors that can arise there. ") +cat(paste0(" +## Quality filtering using DADA2 + +Additional quality filtering can improve sequence recovery. +Often it is advised trimming the last few nucleotides to avoid less well-controlled errors that can arise there. +")) if (params$trunc_qmin) { f_and_tr_args <- readLines(params$dada_filtntrim_args) @@ -334,28 +357,37 @@ if (params$flag_single_end) { ``` ```{r, eval = !isFALSE(params$dada_qc_f_path), results='asis'} -cat("Overall read quality profiles as heat map of the frequency of each quality score at each base position. ", - "The mean quality score at each position is shown by the green line, and the quartiles of the quality score ", - "distribution by the orange lines. The red line shows the scaled proportion of reads that extend to at least ", - "that position. Original plots can be found [folder dada2/QC/](../dada2/QC/) with names that end in `_qual_stats.pdf`.") +cat(paste0(" +Overall read quality profiles as heat map of the frequency of each quality score at each base position. +The mean quality score at each position is shown by the green line, and the quartiles of the quality score +distribution by the orange lines. The red line shows the scaled proportion of reads that extend to at least +that position. Original plots can be found [folder dada2/QC/](../dada2/QC/) with names that end in `_qual_stats.pdf`. +")) ``` ```{r, eval = !isFALSE(params$dada_err_path) || !isFALSE(params$dada_stats_path) || !isFALSE(params$asv_table_path), results='asis'} -cat("# ASV inference using DADA2\n\n", - "[DADA2](https://doi.org/10.1038/nmeth.3869) performs fast and accurate sample inference from amplicon data with single-nucleotide resolution. - It infers exact amplicon sequence variants (ASVs) from amplicon data with fewer false positives than - many other methods while maintaining high sensitivity.\n\n", - "DADA2 reduces sequence errors and dereplicates sequences by quality filtering, denoising, - read pair merging (for paired end Illumina reads only) and PCR chimera removal.") +cat(paste0(" +# ASV inference using DADA2 + +[DADA2](https://doi.org/10.1038/nmeth.3869) performs fast and accurate sample inference from amplicon data with single-nucleotide +resolution. It infers exact amplicon sequence variants (ASVs) from amplicon data with fewer false positives than many other +methods while maintaining high sensitivity. + +DADA2 reduces sequence errors and dereplicates sequences by quality filtering, denoising, +read pair merging (for paired end Illumina reads only) and PCR chimera removal. +")) ``` ```{r, eval = !isFALSE(params$dada_err_path), results='asis'} -cat("## Error correction\n\n", - "Read error correction was performed using estimated error rates, visualized below.\n") +cat(paste0(" +## Error correction + +Read error correction was performed using estimated error rates, visualized below. +")) # check if single run or multirun flag_multirun = length ( unlist( strsplit( params$dada_err_run,"," ) ) ) != 1 @@ -381,26 +413,31 @@ knitr::include_graphics(dada_err_path) ``` ```{r, eval = !isFALSE(params$dada_err_path), results='asis'} -cat("Estimated error rates for each possible transition. The black line shows the estimated error rates after - convergence of the machine-learning algorithm. The red line shows the error rates expected under the nominal - definition of the Q-score. The estimated error rates (black line) should be a good fit to the observed rates - (points), and the error rates should drop with increased quality. Original plots can be found in - [folder dada2/QC/](../dada2/QC/) with names that end in `.err.pdf`.") +cat(paste0(" +Estimated error rates for each possible transition. The black line shows the estimated error rates after +convergence of the machine-learning algorithm. The red line shows the error rates expected under the nominal +definition of the Q-score. The estimated error rates (black line) should be a good fit to the observed rates +(points), and the error rates should drop with increased quality. Original plots can be found in +[folder dada2/QC/](../dada2/QC/) with names that end in `.err.pdf`. +")) ``` ```{r, eval = !isFALSE(params$dada_stats_path), results='asis'} -cat("## Read counts per sample\n\n", - "Tracking read numbers through DADA2 processing steps, for each sample. In the following table are read numbers after each processing stage.\n") +cat(paste0(" +## Read counts per sample + +Tracking read numbers through DADA2 processing steps, for each sample. In the following table are read numbers after each processing stage. +")) if ( params$flag_single_end ) { cat("Processing stages are: input - reads into DADA2, filtered - reads passed quality filtering, ", - "denoised - reads after denoising, nonchim - reads in non-chimeric sequences (final ASVs)") + "denoised - reads after denoising, nonchim - reads in non-chimeric sequences (final ASVs).") } else { cat("Processing stages are: input - read pairs into DADA2, filtered - read pairs passed quality filtering, ", "denoisedF - forward reads after denoising, denoisedR - reverse reads after denoising, ", - "merged - successfully merged read pairs, nonchim - read pairs in non-chimeric sequences (final ASVs)") + "merged - successfully merged read pairs, nonchim - read pairs in non-chimeric sequences (final ASVs).") } # import stats tsv @@ -412,7 +449,7 @@ datatable(dada_stats, options = list( scrollY = "300px", paging = FALSE)) -cat(paste0("*** +cat(paste0(" Samples with unusual low reads numbers relative to the number of expected ASVs should be treated cautiously, because the abundance estimate will be very granular and might vary strongly between (theoretical) replicates due to high impact of stochasticity. @@ -501,10 +538,14 @@ svg("stacked_barchart_of_reads.svg") plot_dada_stats_p_t invisible(dev.off()) -cat("\n\nBetween",min(dada_stats_p$analysis),"% and",max(dada_stats_p$analysis),"% reads per sample were retained for analysis within DADA2 steps.\n\n", - "The proportion of lost reads per processing stage and sample should not be too high, totalling typically <50%. - Samples that are very different in lost reads (per stage) to the majority of samples must be compared with caution, because an unusual problem - (e.g. during nucleotide extraction, library preparation, or sequencing) could have occurred that might add bias to the analysis.") +cat(paste0(" + +Between ",min(dada_stats_p$analysis),"% and ",max(dada_stats_p$analysis),"% reads per sample were retained for analysis within DADA2 steps. + +The proportion of lost reads per processing stage and sample should not be too high, totalling typically <50%. +Samples that are very different in lost reads (per stage) to the majority of samples must be compared with caution, because an unusual problem +(e.g. during nucleotide extraction, library preparation, or sequencing) could have occurred that might add bias to the analysis. +")) ``` @@ -630,10 +671,13 @@ cat("The number of ASVs was reduced by",n_asv-filter_ssu_asv_filtered,"(",100-ro ```{r, eval = !isFALSE(params$filter_len_asv_len_orig), results='asis'} -cat("## Sequence length\n") +cat(paste0(" +## Sequence length -cat("A length filter was used to reduce potential contamination after ASV computation.", - "Before filtering, ASVs had the following length profile (count of 1 was transformed to 1.5 to allow plotting on log10 scale):\n\n") +A length filter was used to reduce potential contamination after ASV computation. +Before filtering, ASVs had the following length profile (count of 1 was transformed to 1.5 to allow plotting on log10 scale): + +")) # ASV length profile @@ -711,11 +755,14 @@ cat("\n\nLength filter results can be found in folder [asv_length_filter](../asv ```{r, eval = !isFALSE(params$filter_codons), results='asis'} -cat("## Codon usage\n") +cat(paste0(" +## Codon usage + +Amplicons of coding regions are expected to be free of stop codons and consist of condon tripletts. +ASVs were filtered against the presence of stop codons (",params$stop_codons,") in the specified open reading frame of the ASV. +Additionally, ASVs that are not multiple of 3 in length were omitted. -cat("Amplicons of coding regions are expected to be free of stop codons and consist of condon tripletts.", - "ASVs were filtered against the presence of stop codons (",params$stop_codons,") in the specified open reading frame of the ASV.", - "Additionally, ASVs that are not multiple of 3 in length were omitted.\n\n") +")) # import stats tsv filter_codons_stats <- read.table(file = params$filter_codons, header = TRUE, sep = "\t") @@ -748,10 +795,14 @@ cat("# Taxonomic Classification\n") ```{r, eval = !isFALSE(params$cut_its), results='asis'} -cat("## ITS region\n") -cat("The ITS region was extracted from each ASV sequence using [ITSx](https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.12073).", - "Taxonomic classification should have improved performance based on extracted ITS sequence.\n") -cat("The extracted ITS region is",params$cut_its,"sequence. ") +cat(paste0(" +## ITS regions + +The ITS region was extracted from each ASV sequence using [ITSx](https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.12073). +Taxonomic classification should have improved performance based on extracted ITS sequence. + +The extracted ITS region is",params$cut_its,"sequence. +")) # Read ITSX summary itsx_summary <- readLines(params$itsx_cutasv_summary) @@ -1034,12 +1085,14 @@ cat("\n\nSINTAX taxonomy assignments can be found in folder [sintax](../sintax). ```{r, eval = !isFALSE(params$pplace_taxonomy), results='asis'} -# Header -cat("## Phylogenetic Placement\n", - "Phylogenetic placement grafts sequences onto a phylogenetic reference tree and optionally outputs taxonomic annotations.", - "The reference tree is ideally made from full-length high-quality sequences containing better evolutionary signal than short amplicons.", - "It is hence superior to estimating de-novo phylogenetic trees from short amplicon sequences. ", - "Extraction of taxonomic classification was performed with [EPA-NG](https://github.com/Pbdas/epa-ng) and [Gappa](https://pubmed.ncbi.nlm.nih.gov/32016344/). ") +cat(paste0(" +## Phylogenetic Placement + +Phylogenetic placement grafts sequences onto a phylogenetic reference tree and optionally outputs taxonomic annotations. +The reference tree is ideally made from full-length high-quality sequences containing better evolutionary signal than short amplicons. +It is hence superior to estimating de-novo phylogenetic trees from short amplicon sequences. +Extraction of taxonomic classification was performed with [EPA-NG](https://github.com/Pbdas/epa-ng) and [Gappa](https://pubmed.ncbi.nlm.nih.gov/32016344/). +")) # Read file and prepare table asv_tax <- read.table(params$pplace_taxonomy, header = TRUE, sep = "\t") @@ -1112,9 +1165,13 @@ cat("# Downstream analysis with QIIME2\n", ```{r, eval = !isFALSE(params$filter_stats_tsv), results='asis'} -cat("## ASV filtering\n", - "Unwanted taxa are often off-targets generated in PCR with primers that are not perfectly specific for the target DNA. - For 16S rRNA sequencing mitrochondria and chloroplast sequences are typically removed because these are frequent unwanted non-bacteria PCR products. ") +cat(paste0(" +## ASV filtering + +Unwanted taxa are often off-targets generated in PCR with primers that are not perfectly specific for the target DNA. +For 16S rRNA sequencing mitrochondria and chloroplast sequences are typically removed because these are frequent unwanted non-bacteria PCR products. +")) + if ( params$exclude_taxa != "none" ) { cat("ASVs were removed when the taxonomic string contained any of `", params$exclude_taxa, "` (comma separated)", sep="") } @@ -1154,28 +1211,46 @@ cat("\n\nTables with read count numbers and filtered abundance tables are in fol ```{r, eval = !isFALSE(params$abundance_tables), results='asis'} -cat("## Abundance tables\n", - "The abundance tables are the final data for further downstream analysis and visualisations. The tables are based on the computed ASVs and taxonomic classification, but after removal of unwanted taxa. ", - "Folder [qiime2/abundance_tables](../qiime2/abundance_tables) contains tap-separated files (.tsv) that can be opened by any spreadsheet software.") - -cat("\n\n## Relative abundance tables\n", - "Absolute abundance tables produced by the previous steps contain count data, but the compositional nature of 16S rRNA amplicon sequencing requires sequencing depth normalisation. This step computes relative abundance tables using TSS (Total Sum Scaling normalisation) for various taxonomic levels and detailed tables for all ASVs with taxonomic classification, sequence and relative abundance for each sample. Typically used for in depth investigation of taxa abundances. ", - "Folder [qiime2/rel_abundance_tables](../qiime2/rel_abundance_tables) contains tap-separated files (.tsv) that can be opened by any spreadsheet software.") +cat(paste0(" +## Abundance tables + +The abundance tables are the final data for further downstream analysis and visualisations. +The tables are based on the computed ASVs and taxonomic classification, but after removal of unwanted taxa. +Folder [qiime2/abundance_tables](../qiime2/abundance_tables) contains tap-separated files (.tsv) +that can be opened by any spreadsheet software. + +## Relative abundance tables + +Absolute abundance tables produced by the previous steps contain count data, but the compositional +nature of 16S rRNA amplicon sequencing requires sequencing depth normalisation. This step computes +relative abundance tables using TSS (Total Sum Scaling normalisation) for various taxonomic levels +and detailed tables for all ASVs with taxonomic classification, sequence and relative abundance for +each sample. Typically used for in depth investigation of taxa abundances. +Folder [qiime2/rel_abundance_tables](../qiime2/rel_abundance_tables) contains tap-separated files (.tsv) +that can be opened by any spreadsheet software. +")) ``` ```{r, eval = !isFALSE(params$barplot), results='asis'} -cat("## Barplot\n", - "Interactive abundance plot that aids exploratory browsing the discovered taxa and their abundance", - "in samples and allows sorting for associated meta data.", - "Folder [qiime2/barplot](../qiime2/barplot) contains barplots, click [qiime2/barplot/index.html](../qiime2/barplot/index.html) to open it in your web browser.") +cat(paste0(" +## Barplot + +Interactive abundance plot that aids exploratory browsing the discovered taxa and their abundance +in samples and allows sorting for associated meta data. Folder [qiime2/barplot](../qiime2/barplot) +contains barplots, click [qiime2/barplot/index.html](../qiime2/barplot/index.html) to open it in +your web browser. +")) ``` ```{r, eval = !isFALSE(params$metadata_category_barplot), results='asis'} -cat("\n\nAdditionally, barplots with average relative abundance values were produced for", - params$metadata_category_barplot,"(comma separated if several) in [qiime2/barplot_average](../qiime2/barplot_average) - in separate folders following the scheme `barplot_{treatment}`:\n") +cat(paste0(" +Additionally, barplots with average relative abundance values were produced +for `",params$metadata_category_barplot,"` (comma separated if several) in [qiime2/barplot_average](../qiime2/barplot_average) +in separate folders following the scheme `barplot_{treatment}`: +")) + metadata_category_barplot <- sort( unlist( strsplit( params$metadata_category_barplot,"," ) ) ) for (category in metadata_category_barplot) { barplot_folder_path <- paste0("qiime2/barplot_average/barplot_",category) @@ -1186,13 +1261,17 @@ for (category in metadata_category_barplot) { ```{r, eval = !isFALSE(params$alpha_rarefaction), results='asis'} -cat("## Alpha diversity rarefaction curves\n", - "Produces rarefaction plots for several alpha diversity indices, and is primarily used to determine if the richness of the samples has been fully observed or sequenced. If the slope of the curves does not level out and the lines do not become horizontal, this might be because the sequencing depth was too low to observe all diversity or that sequencing error artificially increases sequence diversity and causes false discoveries. ") -# warning if dada_sample_inference is independent, because alpha diversities are not expected to be accurate! -if ( params$dada_sample_inference == "independent") { - cat("Please note that ASVs were inferred for each sample independently, that can make alpha diversity indices a poor estimate of true diversity. ") -} -cat("Folder [qiime2/alpha-rarefaction](../qiime2/alpha-rarefaction) contains the data, click [qiime2/alpha-rarefaction/index.html](../qiime2/alpha-rarefaction/index.html) to open it in your web browser.") +cat(paste0(" +## Alpha diversity rarefaction curves + +Produces rarefaction plots for several alpha diversity indices, and is primarily used to determine if the +richness of the samples has been fully observed or sequenced. If the slope of the curves does not level +out and the lines do not become horizontal, this might be because the sequencing depth was too low to observe +all diversity or that sequencing error artificially increases sequence diversity and causes false discoveries. + +Folder [qiime2/alpha-rarefaction](../qiime2/alpha-rarefaction) contains the data, click +[qiime2/alpha-rarefaction/index.html](../qiime2/alpha-rarefaction/index.html) to open it in your web browser. +")) ``` @@ -1200,30 +1279,52 @@ cat("Folder [qiime2/alpha-rarefaction](../qiime2/alpha-rarefaction) contains the ```{r, eval = !isFALSE(params$diversity_indices_beta), results='asis'} diversity_indices_depth <- readLines(params$diversity_indices_depth) -cat("## Diversity analysis\n", - "Diversity measures summarize important sample features (alpha diversity) or differences between samples (beta diversity). Diversity calculations are based on sub-sampled data rarefied to",diversity_indices_depth, "counts. ", - "\n### Alpha diversity indices\n", - "Alpha diversity measures the species diversity within samples. ", - sep = "\n") +cat(paste0(" +## Diversity analysis + +Diversity measures summarize important sample features (alpha diversity) or differences between samples (beta diversity). +Diversity calculations are based on sub-sampled data rarefied to ",diversity_indices_depth, " counts. + +### Alpha diversity indices + +Alpha diversity measures the species diversity within samples. +")) + if ( params$dada_sample_inference == "independent") { cat("Please note that ASVs were inferred for each sample independently, that can make alpha diversity indices a poor estimate of true diversity. ") } -cat("This step calculates alpha diversity using various methods and performs pairwise comparisons of groups of samples. It is based on a phylogenetic tree of all ASV sequences. ", - "Folder [qiime2/diversity/alpha_diversity](../qiime2/diversity/alpha_diversity) contains the alpha-diversity data:\n", - "- Shannon’s diversity index (quantitative): [qiime2/diversity/alpha_diversity/shannon_vector/index.html](../qiime2/diversity/alpha_diversity/shannon_vector/index.html)\n", - "- Pielou’s Evenness: [qiime2/diversity/alpha_diversity/evenness_vector/index.html](../qiime2/diversity/alpha_diversity/evenness_vector/index.html)\n", - "- Faith’s Phylogenetic Diversity (qualitiative, phylogenetic) [qiime2/diversity/alpha_diversity/faith_pd_vector/index.html](../qiime2/diversity/alpha_diversity/faith_pd_vector/index.html)\n", - "- Observed OTUs (qualitative): [qiime2/diversity/alpha_diversity/observed_otus_vector/index.html](../qiime2/diversity/alpha_diversity/observed_otus_vector/index.html)\n", - "\n### Beta diversity indices\n", - "Beta diversity measures the species community differences between samples. This step calculates beta diversity distances using various methods and performs pairwise comparisons of groups of samples. Additionally, principle coordinates analysis (PCoA) plots are produced that can be visualized with Emperor in your default browser without the need for installation. These calculations are based on a phylogenetic tree of all ASV sequences. ", - "Folder [qiime2/diversity/beta_diversity](../qiime2/diversity/beta_diversity) contains the beta-diverity data:\n", - "1 PCoA for four different beta diversity distances are accessible via:\n", - "- Bray-Curtis distance (quantitative): [qiime2/diversity/beta_diversity/bray_curtis_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/bray_curtis_pcoa_results-PCoA/index.html)\n", - "- Jaccard distance (qualitative): [qiime2/diversity/beta_diversity/jaccard_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/jaccard_pcoa_results-PCoA/index.html)\n", - "- unweighted UniFrac distance (qualitative, phylogenetic) [qiime2/diversity/beta_diversity/unweighted_unifrac_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/unweighted_unifrac_pcoa_results-PCoA/index.html)\n", - "- weighted UniFrac distance (quantitative, phylogenetic): [qiime2/diversity/beta_diversity/weighted_unifrac_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/weighted_unifrac_pcoa_results-PCoA/index.html)\n", - "2 Pairwise comparisons between groups of samples is performed for specific metadata that can be found in folder [qiime2/diversity/beta_diversity/](../qiime2/diversity/beta_diversity/). Each significance test result is in its separate folder following the scheme `{method}_distance_matrix-{treatment}`:", - sep = "\n") + +cat(paste0(" +This step calculates alpha diversity using various methods and performs pairwise comparisons of groups of samples. It is based on a phylogenetic tree of all ASV sequences. +Folder [qiime2/diversity/alpha_diversity](../qiime2/diversity/alpha_diversity) contains the alpha-diversity data: + +- Shannon’s diversity index (quantitative): [qiime2/diversity/alpha_diversity/shannon_vector/index.html](../qiime2/diversity/alpha_diversity/shannon_vector/index.html) +- Pielou’s Evenness: [qiime2/diversity/alpha_diversity/evenness_vector/index.html](../qiime2/diversity/alpha_diversity/evenness_vector/index.html) +- Faith’s Phylogenetic Diversity (qualitiative, phylogenetic) [qiime2/diversity/alpha_diversity/faith_pd_vector/index.html](../qiime2/diversity/alpha_diversity/faith_pd_vector/index.html) +- Observed OTUs (qualitative): [qiime2/diversity/alpha_diversity/observed_otus_vector/index.html](../qiime2/diversity/alpha_diversity/observed_otus_vector/index.html) + +### Beta diversity indices + +Beta diversity measures the species community differences between samples. This step calculates beta diversity distances using +various methods and performs pairwise comparisons of groups of samples. Additionally, principle coordinates analysis (PCoA) +plots are produced that can be visualized with Emperor in your default browser without the need for installation. +These calculations are based on a phylogenetic tree of all ASV sequences. +Folder [qiime2/diversity/beta_diversity](../qiime2/diversity/beta_diversity) contains the beta-diverity data: + +#### PCoA for four different beta diversity distances are accessible via: + +- Bray-Curtis distance (quantitative): [qiime2/diversity/beta_diversity/bray_curtis_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/bray_curtis_pcoa_results-PCoA/index.html) +- Jaccard distance (qualitative): [qiime2/diversity/beta_diversity/jaccard_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/jaccard_pcoa_results-PCoA/index.html) +- unweighted UniFrac distance (qualitative, phylogenetic) [qiime2/diversity/beta_diversity/unweighted_unifrac_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/unweighted_unifrac_pcoa_results-PCoA/index.html) +- weighted UniFrac distance (quantitative, phylogenetic): [qiime2/diversity/beta_diversity/weighted_unifrac_pcoa_results-PCoA/index.html](../qiime2/diversity/beta_diversity/weighted_unifrac_pcoa_results-PCoA/index.html) + +#### Pairwise comparisons between groups of samples + +Statistics on differences between specific metadata groups that can be found in folder +[qiime2/diversity/beta_diversity/](../qiime2/diversity/beta_diversity/). Each significance test +result is in its separate folder following the scheme `{method}_distance_matrix-{treatment}`: +")) + diversity_indices_beta <- sort( unlist( strsplit( params$diversity_indices_beta,"," ) ) ) for (folder in diversity_indices_beta) { beta_folder_path <- paste0("qiime2/diversity/",folder) #"beta_diversity/" is defined in input section with "stageAs: 'beta_diversity/*'" @@ -1232,17 +1333,21 @@ for (folder in diversity_indices_beta) { ``` ```{r, eval = !isFALSE(params$qiime_adonis_formula), results='asis'} -cat("_ADONIS test for beta diversity_\n\n") -cat("Permutational multivariate analysis of variance using distance matrices - [adonis](https://doi.org/10.1111/j.1442-9993.2001.01070.pp.x) (in [VEGAN](https://CRAN.R-project.org/package=vegan)) - determines whether groups of samples are significantly different from one another. - The formula was `",params$qiime_adonis_formula,"` (multiple formulas are comma separated). - adonis computes an R2 value (effect size) which shows the percentage of variation explained - by a condition, as well as a p-value to determine the statistical significance. - The sequence of conditions in the formula matters, the variance of factors is removed - (statistically controlled for) from beginning to end of the formula. " ) - -cat("\n\nTest results are in separate folders following the scheme `{method}_distance_matrix-{adonis formula}`:\n") +cat(paste0(" +#### ADONIS test + +Permutational multivariate analysis of variance using distance matrices +[adonis](https://doi.org/10.1111/j.1442-9993.2001.01070.pp.x) (in [VEGAN](https://CRAN.R-project.org/package=vegan)) +determines whether groups of samples are significantly different from one another. +The formula was `",params$qiime_adonis_formula,"` (multiple formulas are comma separated). +adonis computes an R2 value (effect size) which shows the percentage of variation explained +by a condition, as well as a p-value to determine the statistical significance. +The sequence of conditions in the formula matters, the variance of factors is removed +(statistically controlled for) from beginning to end of the formula. + +Test results are in separate folders following the scheme `{method}_distance_matrix-{adonis formula}`: +")) + diversity_indices_adonis <- sort( unlist( strsplit( params$diversity_indices_adonis,"," ) ) ) for (folder in diversity_indices_adonis) { adonis_index_path <- paste0("qiime2/diversity/",folder) #"beta_diversity/" is defined in input section with "stageAs: 'beta_diversity/adonis/*'" @@ -1253,16 +1358,19 @@ for (folder in diversity_indices_adonis) { ```{r, eval = !isFALSE(params$ancom), results='asis'} -cat("## ANCOM\n\n") -cat("[Analysis of Composition of Microbiomes (ANCOM)](https://www.ncbi.nlm.nih.gov/pubmed/26028277) - is applied to identify features that are differentially - abundant across sample groups. A key assumption made by ANCOM is that few taxa (less than about 25%) - will be differentially abundant between groups otherwise the method will be inaccurate. - Comparisons between groups of samples is performed for specific metadata that can be found in folder - [qiime2/ancom/](../qiime2/ancom/). ", - sep = "\n") - -cat("\n\nTest results are in separate folders following the scheme `Category-{treatment}-{taxonomic level}`:\n") +cat(paste0(" +## ANCOM + +[Analysis of Composition of Microbiomes (ANCOM)](https://www.ncbi.nlm.nih.gov/pubmed/26028277) +is applied to identify features that are differentially +abundant across sample groups. A key assumption made by ANCOM is that few taxa (less than about 25%) +will be differentially abundant between groups otherwise the method will be inaccurate. +Comparisons between groups of samples is performed for specific metadata that can be found in folder +[qiime2/ancom/](../qiime2/ancom/). + +Test results are in separate folders following the scheme `Category-{treatment}-{taxonomic level}`: +")) + ancom <- sort( unlist( strsplit( params$ancom,"," ) ) ) for (folder in ancom) { ancom_path <- paste0("qiime2/ancom/",folder) @@ -1273,12 +1381,16 @@ for (folder in ancom) { ```{r, eval = !isFALSE(params$picrust_pathways), results='asis'} -cat("## PICRUSt2\n", - "[PICRUSt2](https://pubmed.ncbi.nlm.nih.gov/32483366/) (Phylogenetic Investigation of Communities by Reconstruction of Unobserved States) is a software for predicting functional abundances based only on marker gene sequences.", - "Enzyme Classification numbers (EC), KEGG orthologs (KO) and MetaCyc ontology predictions were made for each sample.", - "In folder [PICRUSt2/](../PICRUSt2/) are predicted quantifications for Enzyme Classification numbers (EC), see `EC_pred_metagenome_unstrat_descrip.tsv`, KEGG orthologs (KO), see `KO_pred_metagenome_unstrat_descrip.tsv`, MetaCyc ontology, see `METACYC_path_abun_unstrat_descrip.tsv`.", - "Quantifications are not normalized yet, they can be normalized e.g. by the total sum per sample.", - sep = "\n") +cat(paste0(" +## PICRUSt2 + +[PICRUSt2](https://pubmed.ncbi.nlm.nih.gov/32483366/) (Phylogenetic Investigation of Communities by Reconstruction of Unobserved States) +is a software for predicting functional abundances based only on marker gene sequences. +Enzyme Classification numbers (EC), KEGG orthologs (KO) and MetaCyc ontology predictions were made for each sample. +In folder [PICRUSt2/](../PICRUSt2/) are predicted quantifications for Enzyme Classification numbers (EC), see +`EC_pred_metagenome_unstrat_descrip.tsv`, KEGG orthologs (KO), see `KO_pred_metagenome_unstrat_descrip.tsv`, MetaCyc ontology, +see `METACYC_path_abun_unstrat_descrip.tsv`. Quantifications are not normalized yet, they can be normalized e.g. by the total sum per sample. +")) ``` @@ -1315,12 +1427,14 @@ if ( !isFALSE(params$mqc_plot) ) { and a summary of non-default parameter in [MultiQC's Workflow Summary](../multiqc/multiqc_report.html#nf-core-ampliseq-summary).\n\n") } # with & without MultiQC -cat("Technical information to the pipeline run are collected in folder [pipeline_info](../pipeline_info), - including software versions collected at runtime in file `software_versions.yml` (can be viewed with a text editor), - execution report in file `execution_report_{date}_{time}.html`, - execution trace in file `execution_trace_{date}_{time}.txt`, - execution timeline in file `execution_timelime_{date}_{time}.html`, and - pipeline direct acyclic graph (DAG) in file `pipeline_dag_{date}_{time}.html`.") +cat(paste0(" +Technical information to the pipeline run are collected in folder [pipeline_info](../pipeline_info), +including software versions collected at runtime in file `software_versions.yml` (can be viewed with a text editor), +execution report in file `execution_report_{date}_{time}.html`, +execution trace in file `execution_trace_{date}_{time}.txt`, +execution timeline in file `execution_timelime_{date}_{time}.html`, and +pipeline direct acyclic graph (DAG) in file `pipeline_dag_{date}_{time}.html`. +")) ``` diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 0295d73a..4d33ff77 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -10,6 +10,7 @@ process SUMMARY_REPORT { path(report_template) path(report_styles) path(report_logo) + path(report_abstract) path(metadata) path(samplesheet) path(fasta) @@ -65,9 +66,11 @@ process SUMMARY_REPORT { // all elements must have a value, i.e. booleans also need to be set to TRUE def params_list_named = [ "css='$report_styles'", - "logo='$report_logo'", + "report_logo='$report_logo'", "workflow_manifest_version='${workflow.manifest.version}'", "workflow_scriptid='${workflow.scriptId.substring(0,10)}'", + params.report_title ? "report_title='$params.report_title'" : "", + report_abstract ? "report_abstract='$params.report_abstract'" : "", meta.single_end ? "flag_single_end=TRUE" : "", metadata ? "metadata='$metadata'" : "", samplesheet ? "samplesheet='$samplesheet'" : "", diff --git a/nextflow.config b/nextflow.config index 0c8f9353..bb5c0409 100644 --- a/nextflow.config +++ b/nextflow.config @@ -74,6 +74,8 @@ params { report_template = null report_css = null report_logo = null + report_title = "Summary of analysis results" + report_abstract = null // Skipping options skip_cutadapt = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 715052e6..854be337 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -516,6 +516,16 @@ "type": "string", "default": null, "description": "Path to logo file (png)" + }, + "report_title": { + "type": "string", + "default": "Summary of analysis results", + "description": "String used as report title" + }, + "report_abstract": { + "type": "string", + "default": null, + "description": "Path to Markdown file (md) that replaces the 'Abstract' section" } } }, diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 1a4d2970..651a3cf8 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -83,6 +83,7 @@ ch_report_css = params.report_css ? ch_report_logo = params.report_logo ? Channel.fromPath("${params.report_logo}", checkIfExists: true) : Channel.fromPath("$projectDir/assets/nf-core-ampliseq_logo_light_long.png") +ch_report_abstract = params.report_abstract ? Channel.fromPath(params.report_abstract, checkIfExists: true) : [] // Set non-params Variables @@ -686,6 +687,7 @@ workflow AMPLISEQ { ch_report_template, ch_report_css, ch_report_logo, + ch_report_abstract, ch_metadata.ifEmpty( [] ), params.input.toString().toLowerCase().endsWith("tsv") ? ch_input : [], // samplesheet input is_fasta_input ? PARSE_INPUT.out.fasta.ifEmpty( [] ) : [], // fasta input From 1cce75f410a080af7241e60d6c892416a4615a3e Mon Sep 17 00:00:00 2001 From: Till Englert Date: Fri, 4 Aug 2023 08:48:53 +0200 Subject: [PATCH 096/230] bugfix large sample numbers dada stats table --- assets/report_template.Rmd | 6 ------ 1 file changed, 6 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 8ab387b6..69fa1fbe 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -483,9 +483,6 @@ if ( params$flag_single_end ) { samples_t <- c(rep(dada_stats_p$sample, 4)) steps_t <- c(rep("excluded by filtering", n_samples), rep("excluded by denoised", n_samples), rep("excluded by nonchim", n_samples), rep("reads in final ASVs", n_samples)) - # stack the column for absolute number of asvs - asvs_abs_t <- as.array(flatten_dbl(dada_stats_ex[3:6])) - dada_stats_ex_t <- data.frame(samples_t, steps_t, asvs_abs_t) # stack the column for percentage of asvs asvs_p_t <- as.array(flatten_dbl(dada_stats_p[3:6])) dada_stats_p_t <- data.frame(samples_t, steps_t, asvs_p_t) @@ -514,9 +511,6 @@ if ( params$flag_single_end ) { steps_t <- c(rep("excluded by filtering", n_samples), rep("excluded by denoisedF", n_samples), rep("excluded by denoisedR", n_samples), rep("excluded by merged", n_samples), rep("excluded by nonchim", n_samples), rep("reads in final ASVs", n_samples)) - # stack the column for absolute reads - asvs_abs_t <- as.array(flatten_dbl(dada_stats_ex[3:8])) - dada_stats_ex_t <- data.frame(samples_t, steps_t, asvs_abs_t) # stack the column for percentage of asvs asvs_p_t <- as.array(flatten_dbl(dada_stats_p[3:8])) dada_stats_p_t <- data.frame(samples_t, steps_t, asvs_p_t) From aa98524a51560d1d6310ab0050ca61b5ebf761d0 Mon Sep 17 00:00:00 2001 From: Till Englert Date: Fri, 4 Aug 2023 10:56:36 +0200 Subject: [PATCH 097/230] Change ordering of dada stats stacked bar charts --- assets/report_template.Rmd | 1 + 1 file changed, 1 insertion(+) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 69fa1fbe..36e836e9 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -519,6 +519,7 @@ cat(":\n\n") # Plot dada_stats_p_t$steps_t <- factor(dada_stats_p_t$steps_t, levels=unique(dada_stats_p_t$steps_t)) +dada_stats_p_t$samples_t <- factor(dada_stats_p_t$samples_t, levels=dada_stats_p_t[order(dada_stats_p$analysis),"samples_t"]) plot_dada_stats_p_t <- ggplot(dada_stats_p_t, aes(fill = steps_t, y = asvs_p_t, x = samples_t)) + geom_bar(position = "fill", stat = "identity") + From c76c64bf357ede2413debc53fb4b147cd1d59cfb Mon Sep 17 00:00:00 2001 From: Daniel Straub <42973691+d4straub@users.noreply.github.com> Date: Mon, 7 Aug 2023 12:51:47 +0200 Subject: [PATCH 098/230] Apply suggestions from code review Co-authored-by: Till E. <64961761+tillenglert@users.noreply.github.com> --- assets/report_template.Rmd | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 36e836e9..c4cd4d04 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -174,7 +174,7 @@ if ( !isFALSE(params$metadata) ) { cat(paste0(" # Data input and Metadata -Pipeline input was saved in folder [input](../input). +Pipeline input was saved to the [input](../input) directory. ")) } else { cat(paste0(" @@ -358,7 +358,7 @@ if (params$flag_single_end) { ```{r, eval = !isFALSE(params$dada_qc_f_path), results='asis'} cat(paste0(" -Overall read quality profiles as heat map of the frequency of each quality score at each base position. +Overall read quality profiles are displayed as heat map of the frequency of each quality score at each base position. The mean quality score at each position is shown by the green line, and the quartiles of the quality score distribution by the orange lines. The red line shows the scaled proportion of reads that extend to at least that position. Original plots can be found [folder dada2/QC/](../dada2/QC/) with names that end in `_qual_stats.pdf`. @@ -414,7 +414,7 @@ knitr::include_graphics(dada_err_path) ```{r, eval = !isFALSE(params$dada_err_path), results='asis'} cat(paste0(" -Estimated error rates for each possible transition. The black line shows the estimated error rates after +Estimated error rates are displayed for each possible transition. The black line shows the estimated error rates after convergence of the machine-learning algorithm. The red line shows the error rates expected under the nominal definition of the Q-score. The estimated error rates (black line) should be a good fit to the observed rates (points), and the error rates should drop with increased quality. Original plots can be found in @@ -428,7 +428,7 @@ definition of the Q-score. The estimated error rates (black line) should be a go cat(paste0(" ## Read counts per sample -Tracking read numbers through DADA2 processing steps, for each sample. In the following table are read numbers after each processing stage. +Tracking read numbers through DADA2 processing steps for each sample. The following table shows the read numbers after each processing stage. ")) if ( params$flag_single_end ) { @@ -669,7 +669,7 @@ cat("The number of ASVs was reduced by",n_asv-filter_ssu_asv_filtered,"(",100-ro cat(paste0(" ## Sequence length -A length filter was used to reduce potential contamination after ASV computation. +A length filter was used to reduce potential contamination. Before filtering, ASVs had the following length profile (count of 1 was transformed to 1.5 to allow plotting on log10 scale): ")) @@ -731,7 +731,7 @@ filter_len_stats$'retained%' <- round( filter_len_stats$output / filter_len_stat filter_len_stats_avg_removed <- 100-sum(filter_len_stats$'retained%')/length(filter_len_stats$'retained%') filter_len_stats_max_removed <- 100-min(filter_len_stats$'retained%') -cat("The following table shows (read) counts for each sample before and after filtering:") +cat("The following table shows read counts for each sample before and after filtering:") # Display table datatable(filter_len_stats, options = list( @@ -854,7 +854,7 @@ cat("## DADA2\n") if (!params$flag_ref_tax_user) { cat("The taxonomic classification was performed by [DADA2](https://pubmed.ncbi.nlm.nih.gov/27214047/) using the database: `", params$dada2_ref_tax_title, "`. - More details can be found in the ['Methods section'](#methods).\n\n", sep = "") + More details about the reference taxonomy database can be found in the ['Methods section'](#methods).\n\n", sep = "") } else { cat("The taxonomic classification was performed by DADA2 using a custom database ", "provided by the user.\n\n", sep = "") @@ -949,7 +949,7 @@ cat("## QIIME2\n") cat("The taxonomic classification was performed by [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) using the database: `", params$qiime2_ref_tax_title, "`. - More details can be found in the ['Methods section'](#methods).\n\n", sep = "") + More details about the reference taxonomy database can be found in the ['Methods section'](#methods).\n\n", sep = "") # Read file and prepare table asv_tax <- read.table(params$qiime2_taxonomy, header = TRUE, sep = "\t") @@ -1019,7 +1019,7 @@ cat("## SINTAX\n") cat("The taxonomic classification was performed by [SINTAX](https://doi.org/10.1101/074161) using the database: `", params$sintax_ref_tax_title, "`. - More details can be found in the ['Methods section'](#methods).\n\n", sep = "") + More details about the reference taxonomy database can be found in the ['Methods section'](#methods).\n\n", sep = "") asv_tax <- read.table(params$sintax_taxonomy, header = TRUE, sep = "\t") From 8b3d708d42324673dd84302919e4ef703644ebc5 Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 7 Aug 2023 13:29:08 +0200 Subject: [PATCH 099/230] Implement mor suggestions from review --- CHANGELOG.md | 2 +- README.md | 2 +- assets/report_template.Rmd | 30 ++++++++++++++++-------------- docs/output.md | 6 +++--- modules/local/summary_report.nf | 8 ++++---- nextflow.config | 2 +- nextflow_schema.json | 2 +- 7 files changed, 27 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 92622ed0..58798475 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` -- [#558](https://github.com/nf-core/ampliseq/pull/558) - Html report of results +- [#558](https://github.com/nf-core/ampliseq/pull/558) - Pipeline summary report ### `Changed` diff --git a/README.md b/README.md index b214cc61..f56e7b46 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ By default, the pipeline currently performs the following: - Excludes unwanted taxa, produces absolute and relative feature/taxa count tables and plots, plots alpha rarefaction curves, computes alpha and beta diversity indices and plots thereof ([QIIME2](https://www.nature.com/articles/s41587-019-0209-9)) - Calls differentially abundant taxa ([ANCOM](https://www.ncbi.nlm.nih.gov/pubmed/26028277)) - Pipeline QC summaries ([MultiQC](https://multiqc.info/)) -- Overall pipeline html report ([R Markdown](https://github.com/rstudio/rmarkdown)) +- Pipeline summary report ([R Markdown](https://github.com/rstudio/rmarkdown)) ## Usage diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index c4cd4d04..9700a981 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -24,7 +24,7 @@ params: workflow_scriptid: NULL # flags and arguments - flag_retain_untrimmed: TRUE + flag_retain_untrimmed: FALSE flag_ref_tax_user: FALSE flag_single_end: FALSE barplot: FALSE @@ -64,7 +64,7 @@ params: fasta: FALSE input: FALSE mqc_plot: FALSE - ca_sum_path: FALSE + cutadapt_summary: FALSE dada_filtntrim_args: FALSE dada_qc_f_path: FALSE dada_qc_r_path: "" @@ -239,7 +239,7 @@ knitr::include_graphics(params$mqc_plot) -```{r, eval = !isFALSE(params$ca_sum_path), results='asis'} +```{r, eval = !isFALSE(params$cutadapt_summary), results='asis'} cat(paste0(" ## Primer removal with Cutadapt @@ -250,25 +250,26 @@ the denoising tool or sequences might be lost due to being labelled as PCR chime ")) # import tsv -cutadapt_summary <- read.table(file = params$ca_sum_path, header = TRUE, sep = "\t") +cutadapt_summary <- read.table(file = params$cutadapt_summary, header = TRUE, sep = "\t") -passed_col <- as.numeric(substr( +cutadapt_passed_col <- as.numeric(substr( cutadapt_summary$cutadapt_passing_filters_percent, 1, 4)) -max_disc <- round( 100 - min(passed_col), 1 ) -avg_passed <- round(mean(passed_col),1) +cutadapt_max_discarded <- round( 100 - min(cutadapt_passed_col), 1 ) +cutadapt_avg_passed <- round(mean(cutadapt_passed_col),1) cutadapt_text_unch <- "Primers were trimmed using cutadapt" cutadapt_text_ch <- paste0(" and all untrimmed sequences were discarded. ", "Sequences that did not contain primer sequences were considered artifacts. Less than ", - max_disc, "% of the sequences were discarded per sample and a mean of ", - avg_passed, "% of the sequences per sample passed the filtering.") + cutadapt_max_discarded, "% of the sequences were discarded per sample and a mean of ", + cutadapt_avg_passed, "% of the sequences per sample passed the filtering. ") -if (!params$flag_retain_untrimmed) cutadapt_text <- paste0( +if ( isFALSE(params$flag_retain_untrimmed) ) cutadapt_text <- paste0( cutadapt_text_unch, cutadapt_text_ch - ) else cutadapt_text <- paste0(cutadapt_text_unch, ".") + ) else cutadapt_text <- paste0(cutadapt_text_unch, ". ") cat(cutadapt_text) +cat("Cutadapt results can be found in folder [cutadapt](../cutadapt).") # shorten header by "cutadapt_" to optimize visualisation colnames(cutadapt_summary) <- gsub("cutadapt_","",colnames(cutadapt_summary)) @@ -277,8 +278,6 @@ datatable(cutadapt_summary, options = list( scrollX = TRUE, scrollY = "300px", paging = FALSE)) - -cat("\n\nCutadapt results can be found in folder [cutadapt](../cutadapt).") ``` @@ -472,6 +471,7 @@ if ( params$flag_single_end ) { nonchim = dada_stats$denoised-dada_stats$nonchim, analysis = dada_stats$nonchim) dada_stats_p <- data.frame(sample = dada_stats_ex$sample, round(dada_stats_ex[2:6]/dada_stats_ex$input*100, 2)) + dada_stats_p_analysis_average <- round(sum(dada_stats_p$analysis)/length(dada_stats_p$analysis), 1) # If more than 20 sample only display subset! if ( nrow(dada_stats_p)>=20 ) { cat(" (display 10 samples of each lowest and highest percentage of reads analysed, of",nrow(dada_stats_p),"samples)") @@ -499,6 +499,7 @@ if ( params$flag_single_end ) { nonchim = dada_stats$merged-dada_stats$nonchim, analysis = dada_stats$nonchim) dada_stats_p <- data.frame(sample = dada_stats_ex$sample, round(dada_stats_ex[2:8]/dada_stats_ex$DADA2_input*100, 2)) + dada_stats_p_analysis_average <- round(sum(dada_stats_p$analysis)/length(dada_stats_p$analysis), 1) # If more than 20 sample only display subset! if ( nrow(dada_stats_p)>=20 ) { cat(" (display 10 samples of each lowest and highest percentage of reads analysed, of",nrow(dada_stats_p),"samples)") @@ -535,7 +536,8 @@ invisible(dev.off()) cat(paste0(" -Between ",min(dada_stats_p$analysis),"% and ",max(dada_stats_p$analysis),"% reads per sample were retained for analysis within DADA2 steps. +Between ",min(dada_stats_p$analysis),"% and ",max(dada_stats_p$analysis),"% reads per sample (average ",dada_stats_p_analysis_average,"%) +were retained for analysis within DADA2 steps. The proportion of lost reads per processing stage and sample should not be too high, totalling typically <50%. Samples that are very different in lost reads (per stage) to the majority of samples must be compared with caution, because an unusual problem diff --git a/docs/output.md b/docs/output.md index d206306c..0a407aa3 100644 --- a/docs/output.md +++ b/docs/output.md @@ -17,7 +17,7 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: - [Input](#input) - Input files -- [Html summary](#html-summary) - Overview of pipeline output +- [Pipeline summary report](#pipeline-summary-report) - Overview of pipeline output - [Preprocessing](#preprocessing) - [FastQC](#fastqc) - Read quality control - [Cutadapt](#cutadapt) - Primer trimming @@ -59,7 +59,7 @@ Samplesheet, ASV fasta, and metadata file are copied into the results folder. -### Html summary +### Pipeline summary report A summary report for most pipeline results in html format produced by [R Markdown](https://rmarkdown.rstudio.com/). The report gives a general overview of the analysis, includes many tables and visualizations, and links to interactive downstream analysis results, if available. @@ -67,7 +67,7 @@ A summary report for most pipeline results in html format produced by [R Markdow Output files - `summary_report/` - - `summary_report.html`: a standalone HTML file that can be viewed in your web browser. + - `summary_report.html`: pipeline summary report as standalone HTML file that can be viewed in your web browser. - `*.svg*`: plots that were produced for (and are included in) the report. - `versions.yml`: software versions used to produce this report. diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 4d33ff77..8af605c6 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -15,7 +15,7 @@ process SUMMARY_REPORT { path(samplesheet) path(fasta) path(mqc_plots) - path(ca_summary) + path(cutadapt_summary) val(find_truncation_values) path(dada_filtntrim_args) path(dada_qual_stats) @@ -77,9 +77,9 @@ process SUMMARY_REPORT { fasta ? "fasta='$fasta'" : "", !fasta && !samplesheet ? "input='$params.input'" : "", mqc_plots ? "mqc_plot='${mqc_plots}/svg/mqc_fastqc_per_sequence_quality_scores_plot_1.svg'" : "", - ca_summary ? - params.retain_untrimmed ? "flag_retain_untrimmed=TRUE,ca_sum_path='$ca_summary'" : - "ca_sum_path='$ca_summary'" : "", + cutadapt_summary ? + params.retain_untrimmed ? "flag_retain_untrimmed=TRUE,cutadapt_summary='$cutadapt_summary'" : + "cutadapt_summary='$cutadapt_summary'" : "", find_truncation_values ? "trunc_qmin=$params.trunc_qmin,trunc_rmin=$params.trunc_rmin" : "", "trunclenf='$params.trunclenf'", "trunclenr='$params.trunclenr'", diff --git a/nextflow.config b/nextflow.config index bb5c0409..79f5cb64 100644 --- a/nextflow.config +++ b/nextflow.config @@ -93,7 +93,7 @@ params { skip_diversity_indices = false skip_ancom = false skip_multiqc = false - skip_report = false + skip_report = false // Database options dada_ref_taxonomy = "silva=138" diff --git a/nextflow_schema.json b/nextflow_schema.json index 854be337..f1e9b12d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -497,7 +497,7 @@ } }, "pipeline_report": { - "title": "Pipeline report", + "title": "Pipeline summary report", "type": "object", "description": "", "default": "", From 46b47da93559c6707b9b108977182eda43400d43 Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 7 Aug 2023 13:51:17 +0200 Subject: [PATCH 100/230] move report defaults to nextflow.config --- nextflow.config | 6 +++--- nextflow_schema.json | 6 +++--- workflows/ampliseq.nf | 12 +++--------- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/nextflow.config b/nextflow.config index 79f5cb64..4c3ea6d5 100644 --- a/nextflow.config +++ b/nextflow.config @@ -71,9 +71,9 @@ params { ancom_sample_min_count = 1 // Report options - report_template = null - report_css = null - report_logo = null + report_template = "${projectDir}/assets/report_template.Rmd" + report_css = "${projectDir}/assets/nf-core_style.css" + report_logo = "${projectDir}/assets/nf-core-ampliseq_logo_light_long.png" report_title = "Summary of analysis results" report_abstract = null diff --git a/nextflow_schema.json b/nextflow_schema.json index f1e9b12d..33642f33 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -504,17 +504,17 @@ "properties": { "report_template": { "type": "string", - "default": null, + "default": "${projectDir}/assets/report_template.Rmd", "description": "Path to Markdown file (Rmd)" }, "report_css": { "type": "string", - "default": null, + "default": "${projectDir}/assets/nf-core_style.css", "description": "Path to style file (css)" }, "report_logo": { "type": "string", - "default": null, + "default": "${projectDir}/assets/nf-core-ampliseq_logo_light_long.png", "description": "Path to logo file (png)" }, "report_title": { diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 651a3cf8..bfe51b27 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -74,15 +74,9 @@ if (params.sintax_ref_taxonomy && !params.skip_taxonomy) { } // report sources -ch_report_template = params.report_template ? - Channel.fromPath("${params.report_template}", checkIfExists: true) : - Channel.fromPath("$projectDir/assets/report_template.Rmd") -ch_report_css = params.report_css ? - Channel.fromPath("${params.report_css}", checkIfExists: true) : - Channel.fromPath("$projectDir/assets/nf-core_style.css") -ch_report_logo = params.report_logo ? - Channel.fromPath("${params.report_logo}", checkIfExists: true) : - Channel.fromPath("$projectDir/assets/nf-core-ampliseq_logo_light_long.png") +ch_report_template = Channel.fromPath("${params.report_template}", checkIfExists: true) +ch_report_css = Channel.fromPath("${params.report_css}", checkIfExists: true) +ch_report_logo = Channel.fromPath("${params.report_logo}", checkIfExists: true) ch_report_abstract = params.report_abstract ? Channel.fromPath(params.report_abstract, checkIfExists: true) : [] // Set non-params Variables From 74c5fa5c432827454c4d433373e37f1c6aff159c Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 7 Aug 2023 14:10:07 +0200 Subject: [PATCH 101/230] add more details to ITSx output --- assets/report_template.Rmd | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 9700a981..2b7aed3d 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -795,10 +795,15 @@ cat("# Taxonomic Classification\n") cat(paste0(" ## ITS regions -The ITS region was extracted from each ASV sequence using [ITSx](https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.12073). -Taxonomic classification should have improved performance based on extracted ITS sequence. +The ",params$cut_its," region was extracted from each ASV sequence using [ITSx](https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.12073). +Taxonomic classification should have improved performance based on extracted ITS sequence. ITSx results can be found in folder [itsx](../itsx). -The extracted ITS region is",params$cut_its,"sequence. +Taxonomies per extracted region was then transferred back to the full ASV sequence. No filtering was done based on whether the region was found or not. +Those taxonomic classifications per ASV can be found in files `ASV_tax.tsv` and `ASV_tax_species.tsv` in folder [dada2/](../dada2/). + +However, the files `ASV_ITS_tax.tsv` and `ASV_ITS_tax_species.tsv` in folder [dada2/](../dada2/) contain only the chosen ITS part of just the ASVs where the region was found. +Of course, different ASVs may contain identical ",params$cut_its," regions, leading to identical taxonomy assignments, +but the full ASVs were recorded as separate entries anyway to retain maximum resolution at this stage. ")) # Read ITSX summary @@ -843,8 +848,6 @@ plot_itsx_origins svg("itsx_preliminary_origin.svg") plot_itsx_origins invisible(dev.off()) - -cat("\n\nITSx results can be found in folder [itsx](../itsx).") ``` From 167db88426a5f15f66160a3be485939db57d155f Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 7 Aug 2023 16:32:28 +0200 Subject: [PATCH 102/230] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c52dae40..d66c2116 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` - [#605](https://github.com/nf-core/ampliseq/pull/605) - Make `--sbdiexport` compatible with PR2 version 5.0.0 +- [#614](https://github.com/nf-core/ampliseq/pull/614) - Template update for nf-core/tools version 2.9 ### `Dependencies` From 6bb23df0b3064db356d0027a6f685c909213cf18 Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 7 Aug 2023 16:35:04 +0200 Subject: [PATCH 103/230] update QIIME2 container names --- modules/local/qiime2_alphararefaction.nf | 2 +- modules/local/qiime2_ancom_asv.nf | 2 +- modules/local/qiime2_ancom_tax.nf | 2 +- modules/local/qiime2_barplot.nf | 2 +- modules/local/qiime2_classify.nf | 2 +- modules/local/qiime2_diversity_adonis.nf | 2 +- modules/local/qiime2_diversity_alpha.nf | 2 +- modules/local/qiime2_diversity_beta.nf | 2 +- modules/local/qiime2_diversity_betaord.nf | 2 +- modules/local/qiime2_diversity_core.nf | 2 +- modules/local/qiime2_export_absolute.nf | 2 +- modules/local/qiime2_export_relasv.nf | 2 +- modules/local/qiime2_export_reltax.nf | 2 +- modules/local/qiime2_extract.nf | 2 +- modules/local/qiime2_featuretable_group.nf | 2 +- modules/local/qiime2_filtersamples.nf | 2 +- modules/local/qiime2_filtertaxa.nf | 2 +- modules/local/qiime2_inasv.nf | 2 +- modules/local/qiime2_inseq.nf | 2 +- modules/local/qiime2_intax.nf | 2 +- modules/local/qiime2_intree.nf | 2 +- modules/local/qiime2_train.nf | 2 +- modules/local/qiime2_tree.nf | 2 +- 23 files changed, 23 insertions(+), 23 deletions(-) diff --git a/modules/local/qiime2_alphararefaction.nf b/modules/local/qiime2_alphararefaction.nf index 9d656840..9ff9c782 100644 --- a/modules/local/qiime2_alphararefaction.nf +++ b/modules/local/qiime2_alphararefaction.nf @@ -1,7 +1,7 @@ process QIIME2_ALPHARAREFACTION { label 'process_low' - container "quay.io/qiime2/core:2022.11" + container "qiime2/core:2022.11" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/qiime2_ancom_asv.nf b/modules/local/qiime2_ancom_asv.nf index 322b414e..165ca45f 100644 --- a/modules/local/qiime2_ancom_asv.nf +++ b/modules/local/qiime2_ancom_asv.nf @@ -5,7 +5,7 @@ process QIIME2_ANCOM_ASV { label 'process_long' label 'error_ignore' - container "quay.io/qiime2/core:2022.11" + container "qiime2/core:2022.11" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/qiime2_ancom_tax.nf b/modules/local/qiime2_ancom_tax.nf index 9f5392ef..717e7286 100644 --- a/modules/local/qiime2_ancom_tax.nf +++ b/modules/local/qiime2_ancom_tax.nf @@ -3,7 +3,7 @@ process QIIME2_ANCOM_TAX { label 'process_medium' label 'single_cpu' - container "quay.io/qiime2/core:2022.11" + container "qiime2/core:2022.11" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/qiime2_barplot.nf b/modules/local/qiime2_barplot.nf index 3e83ab02..bb0c8aeb 100644 --- a/modules/local/qiime2_barplot.nf +++ b/modules/local/qiime2_barplot.nf @@ -1,7 +1,7 @@ process QIIME2_BARPLOT { label 'process_low' - container "quay.io/qiime2/core:2022.11" + container "qiime2/core:2022.11" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/qiime2_classify.nf b/modules/local/qiime2_classify.nf index f5a4824d..c32fff03 100644 --- a/modules/local/qiime2_classify.nf +++ b/modules/local/qiime2_classify.nf @@ -2,7 +2,7 @@ process QIIME2_CLASSIFY { tag "${repseq},${trained_classifier}" label 'process_high' - container "quay.io/qiime2/core:2022.11" + container "qiime2/core:2022.11" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/qiime2_diversity_adonis.nf b/modules/local/qiime2_diversity_adonis.nf index 25bc95f8..78b15dd3 100644 --- a/modules/local/qiime2_diversity_adonis.nf +++ b/modules/local/qiime2_diversity_adonis.nf @@ -2,7 +2,7 @@ process QIIME2_DIVERSITY_ADONIS { tag "${core.baseName} - ${formula}" label 'process_low' - container "quay.io/qiime2/core:2022.11" + container "qiime2/core:2022.11" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/qiime2_diversity_alpha.nf b/modules/local/qiime2_diversity_alpha.nf index dff59e3e..ae1db546 100644 --- a/modules/local/qiime2_diversity_alpha.nf +++ b/modules/local/qiime2_diversity_alpha.nf @@ -2,7 +2,7 @@ process QIIME2_DIVERSITY_ALPHA { tag "${core.baseName}" label 'process_low' - container "quay.io/qiime2/core:2022.11" + container "qiime2/core:2022.11" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/qiime2_diversity_beta.nf b/modules/local/qiime2_diversity_beta.nf index f6fc5ee7..8f73ff2c 100644 --- a/modules/local/qiime2_diversity_beta.nf +++ b/modules/local/qiime2_diversity_beta.nf @@ -2,7 +2,7 @@ process QIIME2_DIVERSITY_BETA { tag "${core.baseName} - ${category}" label 'process_low' - container "quay.io/qiime2/core:2022.11" + container "qiime2/core:2022.11" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/qiime2_diversity_betaord.nf b/modules/local/qiime2_diversity_betaord.nf index 7b2699a4..aba4afa8 100644 --- a/modules/local/qiime2_diversity_betaord.nf +++ b/modules/local/qiime2_diversity_betaord.nf @@ -2,7 +2,7 @@ process QIIME2_DIVERSITY_BETAORD { tag "${core.baseName}" label 'process_low' - container "quay.io/qiime2/core:2022.11" + container "qiime2/core:2022.11" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/qiime2_diversity_core.nf b/modules/local/qiime2_diversity_core.nf index 99fe9280..52cb1e6f 100644 --- a/modules/local/qiime2_diversity_core.nf +++ b/modules/local/qiime2_diversity_core.nf @@ -1,7 +1,7 @@ process QIIME2_DIVERSITY_CORE { label 'process_low' - container "quay.io/qiime2/core:2022.11" + container "qiime2/core:2022.11" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/qiime2_export_absolute.nf b/modules/local/qiime2_export_absolute.nf index 624547d5..9bfe0d0a 100644 --- a/modules/local/qiime2_export_absolute.nf +++ b/modules/local/qiime2_export_absolute.nf @@ -1,7 +1,7 @@ process QIIME2_EXPORT_ABSOLUTE { label 'process_low' - container "quay.io/qiime2/core:2022.11" + container "qiime2/core:2022.11" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/qiime2_export_relasv.nf b/modules/local/qiime2_export_relasv.nf index a5b81388..9ed1b322 100644 --- a/modules/local/qiime2_export_relasv.nf +++ b/modules/local/qiime2_export_relasv.nf @@ -1,7 +1,7 @@ process QIIME2_EXPORT_RELASV { label 'process_low' - container "quay.io/qiime2/core:2022.11" + container "qiime2/core:2022.11" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/qiime2_export_reltax.nf b/modules/local/qiime2_export_reltax.nf index 8f090b07..ea2cf21a 100644 --- a/modules/local/qiime2_export_reltax.nf +++ b/modules/local/qiime2_export_reltax.nf @@ -1,7 +1,7 @@ process QIIME2_EXPORT_RELTAX { label 'process_low' - container "quay.io/qiime2/core:2022.11" + container "qiime2/core:2022.11" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/qiime2_extract.nf b/modules/local/qiime2_extract.nf index 6f686906..3a10c107 100644 --- a/modules/local/qiime2_extract.nf +++ b/modules/local/qiime2_extract.nf @@ -3,7 +3,7 @@ process QIIME2_EXTRACT { label 'process_low' label 'single_cpu' - container "quay.io/qiime2/core:2022.11" + container "qiime2/core:2022.11" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/qiime2_featuretable_group.nf b/modules/local/qiime2_featuretable_group.nf index 71e9a9b2..44bcfaae 100644 --- a/modules/local/qiime2_featuretable_group.nf +++ b/modules/local/qiime2_featuretable_group.nf @@ -2,7 +2,7 @@ process QIIME2_FEATURETABLE_GROUP { tag "${category}" label 'process_low' - container "quay.io/qiime2/core:2022.11" + container "qiime2/core:2022.11" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/qiime2_filtersamples.nf b/modules/local/qiime2_filtersamples.nf index 6a4a7310..4bdd7e39 100644 --- a/modules/local/qiime2_filtersamples.nf +++ b/modules/local/qiime2_filtersamples.nf @@ -2,7 +2,7 @@ process QIIME2_FILTERSAMPLES { tag "${filter}" label 'process_low' - container "quay.io/qiime2/core:2022.11" + container "qiime2/core:2022.11" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/qiime2_filtertaxa.nf b/modules/local/qiime2_filtertaxa.nf index 0a25803e..1f26ab10 100644 --- a/modules/local/qiime2_filtertaxa.nf +++ b/modules/local/qiime2_filtertaxa.nf @@ -2,7 +2,7 @@ process QIIME2_FILTERTAXA { tag "taxa:${exclude_taxa};min-freq:${min_frequency};min-samples:${min_samples}" label 'process_low' - container "quay.io/qiime2/core:2022.11" + container "qiime2/core:2022.11" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/qiime2_inasv.nf b/modules/local/qiime2_inasv.nf index 348aea87..aea70bb7 100644 --- a/modules/local/qiime2_inasv.nf +++ b/modules/local/qiime2_inasv.nf @@ -2,7 +2,7 @@ process QIIME2_INASV { tag "${asv}" label 'process_low' - container "quay.io/qiime2/core:2022.11" + container "qiime2/core:2022.11" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/qiime2_inseq.nf b/modules/local/qiime2_inseq.nf index a0504053..0cc3aca8 100644 --- a/modules/local/qiime2_inseq.nf +++ b/modules/local/qiime2_inseq.nf @@ -2,7 +2,7 @@ process QIIME2_INSEQ { tag "${seq}" label 'process_low' - container "quay.io/qiime2/core:2022.11" + container "qiime2/core:2022.11" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/qiime2_intax.nf b/modules/local/qiime2_intax.nf index 0e6c69e1..4f35daed 100644 --- a/modules/local/qiime2_intax.nf +++ b/modules/local/qiime2_intax.nf @@ -2,7 +2,7 @@ process QIIME2_INTAX { tag "${tax}" label 'process_low' - container "quay.io/qiime2/core:2022.11" + container "qiime2/core:2022.11" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/qiime2_intree.nf b/modules/local/qiime2_intree.nf index f9f35b97..620e74c0 100644 --- a/modules/local/qiime2_intree.nf +++ b/modules/local/qiime2_intree.nf @@ -2,7 +2,7 @@ process QIIME2_INTREE { tag "${meta.id}:${meta.model}" label 'process_low' - container "quay.io/qiime2/core:2022.11" + container "qiime2/core:2022.11" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/qiime2_train.nf b/modules/local/qiime2_train.nf index 254118f8..289fd6a6 100644 --- a/modules/local/qiime2_train.nf +++ b/modules/local/qiime2_train.nf @@ -3,7 +3,7 @@ process QIIME2_TRAIN { label 'process_high' label 'single_cpu' - container "quay.io/qiime2/core:2022.11" + container "qiime2/core:2022.11" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/qiime2_tree.nf b/modules/local/qiime2_tree.nf index 5fc32fed..c870842f 100644 --- a/modules/local/qiime2_tree.nf +++ b/modules/local/qiime2_tree.nf @@ -1,7 +1,7 @@ process QIIME2_TREE { label 'process_medium' - container "quay.io/qiime2/core:2022.11" + container "qiime2/core:2022.11" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { From 22bef2a979ee38af924c3757b8996e88c3ea0b8e Mon Sep 17 00:00:00 2001 From: Adam Bennett Date: Tue, 8 Aug 2023 10:53:07 +0800 Subject: [PATCH 104/230] Added phyloseq object creation --- CITATIONS.md | 4 ++ bin/reformat_tax_for_phyloseq.py | 32 ++++++++++++++ conf/modules.config | 8 ++++ docs/output.md | 13 ++++++ modules/local/phyloseq.nf | 59 ++++++++++++++++++++++++++ modules/local/phyloseq_inasv.nf | 28 ++++++++++++ modules/local/phyloseq_intax.nf | 29 +++++++++++++ tests/pipeline/iontorrent.nf.test | 3 +- tests/pipeline/multi.nf.test | 3 +- tests/pipeline/pacbio_its.nf.test | 3 +- tests/pipeline/pplace.nf.test | 4 +- tests/pipeline/reftaxcustom.nf.test | 3 +- tests/pipeline/single.nf.test | 3 +- tests/pipeline/sintax.nf.test | 3 +- tests/pipeline/test.nf.test | 4 +- workflows/ampliseq.nf | 66 +++++++++++++++++++++++++++-- 16 files changed, 254 insertions(+), 11 deletions(-) create mode 100755 bin/reformat_tax_for_phyloseq.py create mode 100644 modules/local/phyloseq.nf create mode 100644 modules/local/phyloseq_inasv.nf create mode 100644 modules/local/phyloseq_intax.nf diff --git a/CITATIONS.md b/CITATIONS.md index e488e7bd..44ef54bb 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -109,6 +109,10 @@ > Jari Oksanen, F. Guillaume Blanchet, Michael Friendly, Roeland Kindt, Pierre Legendre, Dan McGlinn, Peter R. Minchin, R. B. O’Hara, Gavin L. Simpson, Peter Solymos, M. Henry H. Stevens, Eduard Szoecs, and Helene Wagner. vegan: Community Ecology Package. 2018. R package version 2.5-3. +- [Phyloseq](https://doi.org/10.1371/journal.pone.0061217) + + > McMurdie PJ, Holmes S (2013). “phyloseq: An R package for reproducible interactive analysis and graphics of microbiome census data.” PLoS ONE, 8(4), e61217. + ### Non-default tools - [ITSx](https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.12073) diff --git a/bin/reformat_tax_for_phyloseq.py b/bin/reformat_tax_for_phyloseq.py new file mode 100755 index 00000000..9a3281fb --- /dev/null +++ b/bin/reformat_tax_for_phyloseq.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 + +import pandas as pd +import sys + +tax_file = sys.argv[1] +out_file = sys.argv[2] + +# Import tsv file +tax_df = pd.read_csv(tax_file, sep="\t") + +# The second column should hold the taxonomy information +tax_col = tax_df.columns[1] + +# Split the values in the tax column +split_tax = tax_df[tax_col].str.split(';', expand=True) + +# Assign names to the new columns with an auto incrementing integer +new_col_names = [f'{tax_col}_{i+1}' for i in range(split_tax.shape[1])] +split_tax.columns = new_col_names + +# Strip whitespace from the tax names +split_tax = split_tax.applymap(lambda x: x.strip() if isinstance(x, str) else x) + +# Drop the original tax column +tax_df = tax_df.drop(columns=[tax_col]) + +# Add the new tax columns to the df +result = pd.concat([tax_df, split_tax], axis=1) + +# Create new tsv file +result.to_csv(out_file, sep='\t', index=False) diff --git a/conf/modules.config b/conf/modules.config index 95d8569a..c431e4e0 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -785,6 +785,14 @@ process { ] } + withName: 'PHYLOSEQ' { + publishDir = [ + path: { "${params.outdir}/phyloseq" }, + mode: params.publish_dir_mode, + pattern: "*.rds" + ] + } + withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, diff --git a/docs/output.md b/docs/output.md index d3d37beb..305e578a 100644 --- a/docs/output.md +++ b/docs/output.md @@ -41,6 +41,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Diversity analysis](#diversity-analysis) - High level overview with different diversity indices - [ANCOM](#ancom) - Differential abundance analysis - [PICRUSt2](#picrust2) - Predict the functional potential of a bacterial community +- [Phyloseq](#phyloseq) - Phyloseq R objects - [Read count report](#read-count-report) - Report of read counts during various steps of the pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution @@ -518,6 +519,18 @@ Most of the fields in the template will not be populated by the export process, +### Phyloseq + +This directory will hold phyloseq objects for each taxonomy table produced by this pipeline. The objects will contain an ASV abundance table and a taxonomy table. If the pipeline is provided with metadata, that metadata will also be included in the phyloseq object. A phylogenetic tree will also be included if the pipeline produces a tree. + +
    +Output files + +- `phyloseq/` + - `_phyloseq.rds`: Phyloseq R object. + +
    + ## Read count report This report includes information on how many reads per sample passed each pipeline step in which a loss can occur. Specifically, how many read pairs entered cutadapt, were reverse complemented, passed trimming; how many read pairs entered DADA2, were denoised, merged and non-chimeric; and how many counts were lost during excluding unwanted taxa and removing low abundance/prevalence sequences in QIIME2. diff --git a/modules/local/phyloseq.nf b/modules/local/phyloseq.nf new file mode 100644 index 00000000..6e5923e9 --- /dev/null +++ b/modules/local/phyloseq.nf @@ -0,0 +1,59 @@ +process PHYLOSEQ { + tag "$prefix" + label 'process_low' + + conda "bioconda::bioconductor-phyloseq=1.44.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bioconductor-phyloseq:1.44.0--r43hdfd78af_0' : + 'quay.io/biocontainers/bioconductor-phyloseq:1.44.0--r43hdfd78af_0' }" + + input: + tuple val(prefix), path(tax_tsv) + path otu_tsv + path sam_tsv + path tree + + output: + tuple val(prefix), path("*phyloseq.rds"), emit: rds + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def sam_tsv = "\"${sam_tsv}\"" + def otu_tsv = "\"${otu_tsv}\"" + def tax_tsv = "\"${tax_tsv}\"" + def tree = "\"${tree}\"" + def prefix = "\"${prefix}\"" + """ + #!/usr/bin/env Rscript + + suppressPackageStartupMessages(library(phyloseq)) + + otu_df <- read.table($otu_tsv, sep="\\t", header=TRUE, row.names=1) + tax_df <- read.table($tax_tsv, sep="\\t", header=TRUE, row.names=1) + otu_mat <- as.matrix(otu_df) + tax_mat <- as.matrix(tax_df) + + OTU <- otu_table(otu_mat, taxa_are_rows=TRUE) + TAX <- tax_table(tax_mat) + phy_obj <- phyloseq(OTU, TAX) + + if (file.exists($sam_tsv)) { + sam_df <- read.table($sam_tsv, sep="\\t", header=TRUE, row.names=1) + SAM <- sample_data(sam_df) + phy_obj <- merge_phyloseq(phy_obj, SAM) + } + + if (file.exists($tree)) { + TREE <- read_tree($tree) + phy_obj <- merge_phyloseq(phy_obj, TREE) + } + + saveRDS(phy_obj, file = paste0($prefix, "_phyloseq.rds")) + + # Version information + writeLines(c("\\"${task.process}\\":", paste0(" R: ", paste0(R.Version()[c("major","minor")], collapse = ".")),paste0(" phyloseq: ", packageVersion("phyloseq"))), "versions.yml") + """ +} \ No newline at end of file diff --git a/modules/local/phyloseq_inasv.nf b/modules/local/phyloseq_inasv.nf new file mode 100644 index 00000000..f66d1669 --- /dev/null +++ b/modules/local/phyloseq_inasv.nf @@ -0,0 +1,28 @@ +process PHYLOSEQ_INASV { + label 'process_low' + + conda "conda-forge::sed=4.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + path(biom_file) + + output: + path( "*.tsv" ) , emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + tail $biom_file -n +2 | sed '1s/#OTU ID/ASV_ID/' > reformat_$biom_file + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bash: \$(bash --version | sed -n 1p | sed 's/GNU bash, version //g') + END_VERSIONS + """ +} diff --git a/modules/local/phyloseq_intax.nf b/modules/local/phyloseq_intax.nf new file mode 100644 index 00000000..6dbd8487 --- /dev/null +++ b/modules/local/phyloseq_intax.nf @@ -0,0 +1,29 @@ +process PHYLOSEQ_INTAX { + label 'process_low' + + conda "conda-forge::pandas=1.1.5" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pandas:1.1.5': + 'biocontainers/pandas:1.1.5' }" + + input: + path(tax_tsv) + + output: + path( "*.tsv" ) , emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + reformat_tax_for_phyloseq.py $tax_tsv reformat_$tax_tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version 2>&1 | sed 's/Python //g') + pandas: \$(python -c "import pkg_resources; print(pkg_resources.get_distribution('pandas').version)") + END_VERSIONS + """ +} diff --git a/tests/pipeline/iontorrent.nf.test b/tests/pipeline/iontorrent.nf.test index 9b73af86..a4a16631 100644 --- a/tests/pipeline/iontorrent.nf.test +++ b/tests/pipeline/iontorrent.nf.test @@ -38,7 +38,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/input/Samplesheet_it_SE_ITS.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/multi.nf.test b/tests/pipeline/multi.nf.test index e4fe28a0..75e2e374 100644 --- a/tests/pipeline/multi.nf.test +++ b/tests/pipeline/multi.nf.test @@ -63,7 +63,8 @@ nextflow_pipeline { { assert new File("$outputDir/qiime2/representative_sequences/filtered-sequences.qza").exists() }, { assert new File("$outputDir/qiime2/representative_sequences/rep-seq.fasta").exists() }, { assert snapshot(path("$outputDir/qiime2/representative_sequences/descriptive_stats.tsv"), - path("$outputDir/qiime2/representative_sequences/seven_number_summary.tsv")).match("qiime2") } + path("$outputDir/qiime2/representative_sequences/seven_number_summary.tsv")).match("qiime2") }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/pacbio_its.nf.test b/tests/pipeline/pacbio_its.nf.test index 39e1d2a2..144db928 100644 --- a/tests/pipeline/pacbio_its.nf.test +++ b/tests/pipeline/pacbio_its.nf.test @@ -52,7 +52,8 @@ nextflow_pipeline { path("$outputDir/SBDI/emof.tsv"), path("$outputDir/SBDI/event.tsv")).match("SBDI") }, { assert new File("$outputDir/SBDI/annotation.tsv").exists() }, - { assert new File("$outputDir/SBDI/asv-table.tsv").exists() } + { assert new File("$outputDir/SBDI/asv-table.tsv").exists() }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/pplace.nf.test b/tests/pipeline/pplace.nf.test index b78c479b..9c4b1806 100644 --- a/tests/pipeline/pplace.nf.test +++ b/tests/pipeline/pplace.nf.test @@ -55,7 +55,9 @@ nextflow_pipeline { { assert new File("$outputDir/pplace/test_pplace.taxonomy.per_query.tsv").exists() }, { assert new File("$outputDir/pplace/test_pplace.graft.test_pplace.epa_result.newick").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/phyloseq/pplace_phyloseq.rds").exists() }, + { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/reftaxcustom.nf.test b/tests/pipeline/reftaxcustom.nf.test index 42e0d104..19035ccb 100644 --- a/tests/pipeline/reftaxcustom.nf.test +++ b/tests/pipeline/reftaxcustom.nf.test @@ -43,7 +43,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/input/Samplesheet.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/single.nf.test b/tests/pipeline/single.nf.test index be236c9a..1aa634a0 100644 --- a/tests/pipeline/single.nf.test +++ b/tests/pipeline/single.nf.test @@ -44,7 +44,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/input/Samplesheet_single_end.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/sintax.nf.test b/tests/pipeline/sintax.nf.test index f6de2995..fb0c8c15 100644 --- a/tests/pipeline/sintax.nf.test +++ b/tests/pipeline/sintax.nf.test @@ -65,7 +65,8 @@ nextflow_pipeline { { assert new File("$outputDir/sintax/ASV_tax_sintax.unite-fungi.tsv").exists() }, { assert new File("$outputDir/sintax/ref_taxonomy_sintax.txt").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/phyloseq/sintax_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/test.nf.test b/tests/pipeline/test.nf.test index 7b295941..e8ba0ce0 100644 --- a/tests/pipeline/test.nf.test +++ b/tests/pipeline/test.nf.test @@ -93,7 +93,9 @@ nextflow_pipeline { path("$outputDir/SBDI/emof.tsv"), path("$outputDir/SBDI/event.tsv")).match("SBDI") }, { assert new File("$outputDir/SBDI/annotation.tsv").exists() }, - { assert new File("$outputDir/SBDI/asv-table.tsv").exists() } + { assert new File("$outputDir/SBDI/asv-table.tsv").exists() }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() }, + { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() } ) } } diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 03e5bf55..025c8d2a 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -165,6 +165,10 @@ include { QIIME2_INTAX } from '../modules/local/qiime2_intax' include { PICRUST } from '../modules/local/picrust' include { SBDIEXPORT } from '../modules/local/sbdiexport' include { SBDIEXPORTREANNOTATE } from '../modules/local/sbdiexportreannotate' +include { PHYLOSEQ } from '../modules/local/phyloseq' +include { PHYLOSEQ_INASV } from '../modules/local/phyloseq_inasv' +include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_PPLACE } from '../modules/local/phyloseq_intax' +include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_QIIME2 } from '../modules/local/phyloseq_intax' // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules @@ -456,7 +460,7 @@ workflow AMPLISEQ { } FASTA_NEWICK_EPANG_GAPPA ( ch_pp_data ) ch_versions = ch_versions.mix( FASTA_NEWICK_EPANG_GAPPA.out.versions ) - + ch_pplace_tax = FORMAT_PPLACETAX ( FASTA_NEWICK_EPANG_GAPPA.out.taxonomy_per_query ).tsv } else { ch_pplace_tax = Channel.empty() @@ -477,7 +481,7 @@ workflow AMPLISEQ { ch_qiime_classifier ) ch_versions = ch_versions.mix( QIIME2_TAXONOMY.out.versions.ifEmpty(null) ) //usually a .first() is here, dont know why this leads here to a warning - } + } // // SUBWORKFLOW / MODULES : Downstream analysis with QIIME2 @@ -597,7 +601,7 @@ workflow AMPLISEQ { tax_agglom_max ) } - } + } // // MODULE: Predict functional potential of a bacterial community from marker genes with Picrust2 @@ -627,6 +631,62 @@ workflow AMPLISEQ { ch_versions = ch_versions.mix(SBDIEXPORT.out.versions.first()) } + // + // MODULE: Create phyloseq objects + // + if ( !params.skip_taxonomy ) { + if ( params.metadata ) { + ch_phyloseq_inmeta = ch_metadata.first() // The .first() is to make sure it's a value channel + } else { + ch_phyloseq_inmeta = [] + } + + ch_phyloseq_intax = Channel.empty() + if ( !params.skip_dada_taxonomy ) { + ch_phyloseq_intax = ch_phyloseq_intax.mix ( + ch_dada2_tax.map { it = [ "dada2", file(it) ] } + ) + } + + if ( params.sintax_ref_taxonomy ) { + ch_phyloseq_intax = ch_phyloseq_intax.mix ( + ch_sintax_tax.map { it = [ "sintax", file(it) ] } + ) + } + + if ( params.pplace_tree ) { + ch_phyloseq_intax = ch_phyloseq_intax.mix ( + PHYLOSEQ_INTAX_PPLACE ( + ch_pplace_tax + ).tsv.map { it = [ "pplace", file(it) ] } + ) + + ch_phyloseq_intree = FASTA_NEWICK_EPANG_GAPPA.out.grafted_phylogeny.map { it = it[1] }.first() + } else { + ch_phyloseq_intree = [] + } + + if ( run_qiime2 ) { + ch_phyloseq_intax = ch_phyloseq_intax.mix ( + PHYLOSEQ_INTAX_QIIME2 ( + QIIME2_TAXONOMY.out.tsv + ).tsv.map { it = [ "qiime2", file(it) ] } + ) + + if ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) { + ch_phyloseq_inasv = PHYLOSEQ_INASV ( QIIME2_FILTERTAXA.out.tsv ).tsv + + } else { + ch_phyloseq_inasv = ch_dada2_asv + } + } else { + ch_phyloseq_inasv = ch_dada2_asv + } + + PHYLOSEQ ( ch_phyloseq_intax, ch_phyloseq_inasv, ch_phyloseq_inmeta, ch_phyloseq_intree ) + ch_versions = ch_versions.mix(PHYLOSEQ.out.versions.first()) + } + CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') ) From 44879f157bc14ab4231a28b613583efb35d83afd Mon Sep 17 00:00:00 2001 From: Adam Bennett Date: Tue, 8 Aug 2023 11:21:33 +0800 Subject: [PATCH 105/230] updated README.md to mention phyloseq object creation --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 56e499a3..8344d847 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ By default, the pipeline currently performs the following: - Taxonomical classification using DADA2, [SINTAX](https://doi.org/10.1101/074161) or [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) - Excludes unwanted taxa, produces absolute and relative feature/taxa count tables and plots, plots alpha rarefaction curves, computes alpha and beta diversity indices and plots thereof ([QIIME2](https://www.nature.com/articles/s41587-019-0209-9)) - Calls differentially abundant taxa ([ANCOM](https://www.ncbi.nlm.nih.gov/pubmed/26028277)) +- Creates phyloseq R objects ([Phyloseq](https://www.bioconductor.org/packages/release/bioc/html/phyloseq.html)) - Overall pipeline run summaries ([MultiQC](https://multiqc.info/)) ## Usage From 51c2fcf44a9bd76482557926554beb1b09083925 Mon Sep 17 00:00:00 2001 From: Adam Bennett <43841526+a4000@users.noreply.github.com> Date: Tue, 8 Aug 2023 14:15:45 +0800 Subject: [PATCH 106/230] Update workflows/ampliseq.nf Co-authored-by: Daniel Lundin --- workflows/ampliseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 025c8d2a..9d7d58ad 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -601,7 +601,7 @@ workflow AMPLISEQ { tax_agglom_max ) } - } + } // // MODULE: Predict functional potential of a bacterial community from marker genes with Picrust2 From 9e2f218802c19e17a440ff5736f8d4e13ff492e8 Mon Sep 17 00:00:00 2001 From: Adam Bennett <43841526+a4000@users.noreply.github.com> Date: Tue, 8 Aug 2023 14:16:02 +0800 Subject: [PATCH 107/230] Update workflows/ampliseq.nf Co-authored-by: Daniel Lundin --- workflows/ampliseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 9d7d58ad..1ed6d159 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -481,7 +481,7 @@ workflow AMPLISEQ { ch_qiime_classifier ) ch_versions = ch_versions.mix( QIIME2_TAXONOMY.out.versions.ifEmpty(null) ) //usually a .first() is here, dont know why this leads here to a warning - } + } // // SUBWORKFLOW / MODULES : Downstream analysis with QIIME2 From efbb04323f585a8d9611d34797fee66a6b598344 Mon Sep 17 00:00:00 2001 From: Adam Bennett <43841526+a4000@users.noreply.github.com> Date: Tue, 8 Aug 2023 14:16:15 +0800 Subject: [PATCH 108/230] Update workflows/ampliseq.nf Co-authored-by: Daniel Lundin --- workflows/ampliseq.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 1ed6d159..a0b0a9f7 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -460,7 +460,6 @@ workflow AMPLISEQ { } FASTA_NEWICK_EPANG_GAPPA ( ch_pp_data ) ch_versions = ch_versions.mix( FASTA_NEWICK_EPANG_GAPPA.out.versions ) - ch_pplace_tax = FORMAT_PPLACETAX ( FASTA_NEWICK_EPANG_GAPPA.out.taxonomy_per_query ).tsv } else { ch_pplace_tax = Channel.empty() From 908f6554c1337eb9fe0e7dab0246fae27aad188b Mon Sep 17 00:00:00 2001 From: Adam Bennett <43841526+a4000@users.noreply.github.com> Date: Tue, 8 Aug 2023 14:16:26 +0800 Subject: [PATCH 109/230] Update modules/local/phyloseq.nf Co-authored-by: Daniel Lundin --- modules/local/phyloseq.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/phyloseq.nf b/modules/local/phyloseq.nf index 6e5923e9..3df9c6db 100644 --- a/modules/local/phyloseq.nf +++ b/modules/local/phyloseq.nf @@ -24,8 +24,8 @@ process PHYLOSEQ { def sam_tsv = "\"${sam_tsv}\"" def otu_tsv = "\"${otu_tsv}\"" def tax_tsv = "\"${tax_tsv}\"" - def tree = "\"${tree}\"" - def prefix = "\"${prefix}\"" + def tree = "\"${tree}\"" + def prefix = "\"${prefix}\"" """ #!/usr/bin/env Rscript From d92dce2abc502c6a1b77eac53f71f844e9ff6ea5 Mon Sep 17 00:00:00 2001 From: Daniel Straub <42973691+d4straub@users.noreply.github.com> Date: Tue, 8 Aug 2023 09:04:26 +0200 Subject: [PATCH 110/230] Typo in nextflow_schema.json Co-authored-by: Daniel Lundin --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 6fd2a0e4..5fc118fc 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -678,7 +678,7 @@ "description": "Validation of parameters fails when an unrecognised parameter is found.", "default": false, "hidden": true, - "help_text": "By default, when an unrecognised parameter is found, it returns a warninig." + "help_text": "By default, when an unrecognised parameter is found, it returns a warning." }, "validationLenientMode": { "type": "boolean", From 26a1f1144c667a825f72557f5d19238d9944c46d Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 8 Aug 2023 11:22:46 +0200 Subject: [PATCH 111/230] fix input --- workflows/ampliseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 4f751d6f..4a971395 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -684,7 +684,7 @@ workflow AMPLISEQ { ch_report_logo, ch_report_abstract, ch_metadata.ifEmpty( [] ), - params.input.toString().toLowerCase().endsWith("tsv") ? ch_input : [], // samplesheet input + params.input.toString().toLowerCase().endsWith("tsv") ? file(params.input) : [], // samplesheet input is_fasta_input ? PARSE_INPUT.out.fasta.ifEmpty( [] ) : [], // fasta input !is_fasta_input && !params.skip_fastqc && !params.skip_multiqc ? MULTIQC.out.plots : [], //.collect().flatten().collectFile(name: "mqc_fastqc_per_sequence_quality_scores_plot_1.svg") !params.skip_cutadapt ? CUTADAPT_WORKFLOW.out.summary.collect().ifEmpty( [] ) : [], From e24346a7c0beba93878aee305fb292a4b80ffb71 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 8 Aug 2023 15:05:12 +0200 Subject: [PATCH 112/230] activate nf-validation for samplesheet --- CHANGELOG.md | 2 + assets/schema_input.json | 38 +++--- conf/test_fasta.config | 2 +- nextflow.config | 2 + nextflow_schema.json | 21 +++- subworkflows/local/parse_input.nf | 201 ++++++++---------------------- workflows/ampliseq.nf | 63 ++++++++-- 7 files changed, 144 insertions(+), 185 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d66c2116..ee029c5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- [#TODO](https://github.com/nf-core/ampliseq/pull/TODO) - `--input` was split into three params: (1) `--input` for samplesheet, (2) `--input_fasta` for ASV/OTU fasta input, (3) `--input_folder` direct FASTQ input + ### `Fixed` - [#605](https://github.com/nf-core/ampliseq/pull/605) - Make `--sbdiexport` compatible with PR2 version 5.0.0 diff --git a/assets/schema_input.json b/assets/schema_input.json index 5b22676f..8ff6422a 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -7,30 +7,34 @@ "items": { "type": "object", "properties": { - "sample": { + "sampleID": { "type": "string", - "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "pattern": "^[a-zA-Z][a-zA-Z0-9_]+$", + "errorMessage": "Sample name must be provided, must start with a letter, and can only contain letters, numbers or underscores; Regex: '^[a-zA-Z][a-zA-Z0-9_]+$'", + "meta": ["id"] }, - "fastq_1": { + "forwardReads": { "type": "string", + "format": "file-path", + "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" }, - "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "reverseReads": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 2 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, + "run": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Run name cannot contain spaces", + "meta": ["run"], + "default": "1" } }, - "required": ["sample", "fastq_1"] + "required": ["sampleID", "forwardReads"] } } diff --git a/conf/test_fasta.config b/conf/test_fasta.config index 78babb74..6d1ee618 100644 --- a/conf/test_fasta.config +++ b/conf/test_fasta.config @@ -20,7 +20,7 @@ params { max_time = '6.h' // Input data - input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/testdata/ASV_seqs.fasta" + input_fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/testdata/ASV_seqs.fasta" dada_ref_taxonomy = "rdp=18" dada_assign_taxlevels = "K,P,C,O,F,Genus" diff --git a/nextflow.config b/nextflow.config index 5fe65106..978105ce 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,6 +11,8 @@ params { // Input options input = null + input_fasta = null + input_folder = null extension = "/*_R{1,2}_001.fastq.gz" pacbio = false iontorrent = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 5fc118fc..06fdb201 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -15,8 +15,23 @@ "type": "string", "mimetype": "text/tsv", "fa_icon": "fas fa-dna", - "description": "Either a tab-separated sample sheet, a fasta file, or a folder containing zipped FastQ files", - "help_text": "Points to the main pipeline input, one of the following:\n- folder containing compressed fastq files\n- sample sheet ending with `.tsv` that points towards compressed fastq files\n- fasta file ending with `.fasta`, `.fna` or `.fa` that will be taxonomically classified\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--multiple_sequencing_runs` (folder input only) if the sequencing data originates from multiple sequencing runs\n- `--extension` (folder input only) if the sequencing file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`)\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1). If the `--sintax_ref_taxonomy` is given, taxonomic assignment is performed using the USEARCH sintax method in addition to DADA2 assignTaxonomy (default: DADA2 assignTaxonomy and 16S rRNA sequence database)\n\n##### Folder containing zipped FastQ files\n\nFor example:\n\n```bash\n--input 'path/to/data'\n```\n\nExample for input data organization from one sequencing run with two samples, paired-end data:\n\n```bash\ndata\n \u251c\u2500sample1_1_L001_R1_001.fastq.gz\n \u251c\u2500sample1_1_L001_R2_001.fastq.gz\n \u251c\u2500sample2_1_L001_R1_001.fastq.gz\n \u2514\u2500sample2_1_L001_R2_001.fastq.gz\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The folder must contain gzip compressed demultiplexed fastq files. If the file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`), please check `--extension`.\n3. Sample identifiers are extracted from file names, i.e. the string before the first underscore `_`, these must be unique\n4. If your data is scattered, produce a sample sheet\n5. All sequencing data should originate from one sequencing run, because processing relies on run-specific error models that are unreliable when data from several sequencing runs are mixed. Sequencing data originating from multiple sequencing runs requires additionally the parameter `--multiple_sequencing_runs` and a specific folder structure.\n\n##### Sample sheet\n\nThe sample sheet file is an alternative way to provide input reads, it must be a tab-separated file ending with `.tsv` that must have two to four columns with the following headers: \n- `sampleID` (required): Unique sample identifiers, any unique string (may not contain dots `.`, must not start with a number)\n- `forwardReads` (required): Paths to (forward) reads zipped FastQ files\n- `reverseReads` (optional): Paths to reverse reads zipped FastQ files, required if the data is paired-end\n- `run` (optional): If the data was produced by multiple sequencing runs, any string\n\nFor example:\n\n```bash\n--input 'path/to/samplesheet.tsv'\n```\n\n##### Fasta file\n\nWhen pointing at a file ending with `.fasta`, `.fna` or `.fa`, the containing sequences will be taxonomically classified. All other pipeline steps will be skipped.\n\nThe sequence header line may contain a description, that will be kept as part of the sequence name. However, tabs will be changed into spaces.\n\nThe fasta file input option can be used to taxonomically classify previously produced ASV/OTU sequences.\n\nFor example:\n\n```bash\n--input 'path/to/amplicon_sequences.fasta'\n```" + "description": "Path to tab-separated sample sheet", + "help_text": "Path to sample sheet ending with `.tsv` that points towards compressed fastq files\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1). If the `--sintax_ref_taxonomy` is given, taxonomic assignment is performed using the USEARCH sintax method in addition to DADA2 assignTaxonomy (default: DADA2 assignTaxonomy and 16S rRNA sequence database)\n\n\n##### Sample sheet\n\nThe sample sheet file is an alternative way to provide input reads, it must be a tab-separated file ending with `.tsv` that must have two to four columns with the following headers: \n- `sampleID` (required): Unique sample identifiers, any unique string (may not contain dots `.`, must not start with a number)\n- `forwardReads` (required): Paths to (forward) reads zipped FastQ files\n- `reverseReads` (optional): Paths to reverse reads zipped FastQ files, required if the data is paired-end\n- `run` (optional): If the data was produced by multiple sequencing runs, any string\n\nFor example:\n\n```bash\n--input 'path/to/samplesheet.tsv'\n```\n\n", + "schema": "assets/schema_input.json" + }, + "input_fasta": { + "type": "string", + "mimetype": "text/tsv", + "fa_icon": "fas fa-dna", + "description": "Path to ASV/OTU fasta file", + "help_text": "Fasta file ending with `.fasta`, `.fna` or `.fa` that will be taxonomically classified\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1). If the `--sintax_ref_taxonomy` is given, taxonomic assignment is performed using the USEARCH sintax method in addition to DADA2 assignTaxonomy (default: DADA2 assignTaxonomy and 16S rRNA sequence database)\n\n##### Fasta file\n\nWhen pointing at a file ending with `.fasta`, `.fna` or `.fa`, the containing sequences will be taxonomically classified. All other pipeline steps will be skipped.\n\nThe sequence header line may contain a description, that will be kept as part of the sequence name. However, tabs will be changed into spaces.\n\nThe fasta file input option can be used to taxonomically classify previously produced ASV/OTU sequences.\n\nFor example:\n\n```bash\n--input 'path/to/amplicon_sequences.fasta'\n```" + }, + "input_folder": { + "type": "string", + "mimetype": "text/tsv", + "fa_icon": "fas fa-dna", + "description": "Path to folder containing zipped FastQ files", + "help_text": "Path to folder containing compressed fastq files\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--multiple_sequencing_runs` if the sequencing data originates from multiple sequencing runs\n- `--extension` if the sequencing file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`)\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1). If the `--sintax_ref_taxonomy` is given, taxonomic assignment is performed using the USEARCH sintax method in addition to DADA2 assignTaxonomy (default: DADA2 assignTaxonomy and 16S rRNA sequence database)\n\n##### Folder containing zipped FastQ files\n\nFor example:\n\n```bash\n--input 'path/to/data'\n```\n\nExample for input data organization from one sequencing run with two samples, paired-end data:\n\n```bash\ndata\n \u251c\u2500sample1_1_L001_R1_001.fastq.gz\n \u251c\u2500sample1_1_L001_R2_001.fastq.gz\n \u251c\u2500sample2_1_L001_R1_001.fastq.gz\n \u2514\u2500sample2_1_L001_R2_001.fastq.gz\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The folder must contain gzip compressed demultiplexed fastq files. If the file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`), please check `--extension`.\n3. Sample identifiers are extracted from file names, i.e. the string before the first underscore `_`, these must be unique\n4. If your data is scattered, produce a sample sheet\n5. All sequencing data should originate from one sequencing run, because processing relies on run-specific error models that are unreliable when data from several sequencing runs are mixed. Sequencing data originating from multiple sequencing runs requires additionally the parameter `--multiple_sequencing_runs` and a specific folder structure." }, "FW_primer": { "type": "string", @@ -50,7 +65,7 @@ "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" } }, - "required": ["input", "outdir"], + "required": ["outdir"], "fa_icon": "fas fa-terminal" }, "sequencing_input": { diff --git a/subworkflows/local/parse_input.nf b/subworkflows/local/parse_input.nf index 33e09978..12521aa8 100644 --- a/subworkflows/local/parse_input.nf +++ b/subworkflows/local/parse_input.nf @@ -1,167 +1,64 @@ -// -// Check input samplesheet or folder and get read channels -// - -// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] -def parse_samplesheet(LinkedHashMap row, single_end) { - //Check if samplesheet contains column sampleID & forwardReads - if (row.sampleID == null || row.forwardReads == null) { - error("ERROR: Please check input samplesheet -> Column 'sampleID' and 'forwardReads' are required but not detected.") - } - //Check if samplesheet contains a column for reverse reads - if (row.reverseReads == null && !single_end) { - error("ERROR: Please check input samplesheet -> Column 'reverseReads' is missing. In case you do have only single ended reads, please specify '--single_end', '--pacbio', or '--iontorrent'.") - } - //Check if samplesheet contains a column run and empty fields - if (row.run != null && row.run == "") { - error("ERROR: Please check input samplesheet -> Column 'run' contains an empty field. Either remove column 'run' or fill each field with a value.") - } - //read meta info - def meta = [:] - meta.id = row.sampleID - meta.single_end = single_end.toBoolean() - meta.run = row.run == null ? "1" : row.run - //read data info - def array = [] - if (!file(row.forwardReads).exists()) { - error("ERROR: Please check input samplesheet -> Forward read FastQ file does not exist!\n${row.forwardReads}") - } - if (meta.single_end) { - array = [ meta, file(row.forwardReads) ] - } else { - if (!file(row.reverseReads).exists()) { - error("ERROR: Please check input samplesheet -> Reverse read FastQ file does not exist!\n${row.reverseReads}") - } - array = [ meta, [ file(row.forwardReads), file(row.reverseReads) ] ] - } - return array -} - workflow PARSE_INPUT { take: - input // file.tsv or folder - is_fasta_input + input // folder single_end multiple_sequencing_runs extension main: - if ( is_fasta_input ) { - // Fasta input directely for classification - ch_fasta = Channel.fromPath(input, checkIfExists: true) - ch_reads_passed = Channel.empty() - } else { - ch_fasta = Channel.empty() - - if ( input.toString().toLowerCase().endsWith("tsv") ) { - // Sample sheet input + // Folder input - tsvFile = file(input).getName() - // extracts read files from TSV and distribute into channels - Channel - .fromPath(input) - .ifEmpty { error("Cannot find path file ${tsvFile}") } - .splitCsv(header:true, sep:'\t') - .map { parse_samplesheet(it, single_end) } - .set { ch_reads } - } else { - // Folder input - - //Check folders in folder when multiple_sequencing_runs - folders = multiple_sequencing_runs ? "/*" : "" - error_message = "\nCannot find any reads matching: \"${input}${folders}${extension}\"\n" - error_message += "Please revise the input folder (\"--input\"): \"${input}\"\n" - error_message += "and the input file pattern (\"--extension\"): \"${extension}\"\n" - error_message += "*Please note: Path needs to be enclosed in quotes!*\n" - error_message += multiple_sequencing_runs ? "If you do not have multiple sequencing runs, please do not use \"--multiple_sequencing_runs\"!\n" : "If you have multiple sequencing runs, please add \"--multiple_sequencing_runs\"!\n" - error_message += "In any case, please consult the pipeline documentation.\n" - if ( single_end ) { - //Get files - single end - Channel - .fromPath( input + folders + extension ) - .ifEmpty { error("${error_message}") } - .map { read -> - def meta = [:] - meta.id = read.baseName.toString().indexOf("_") != -1 ? read.baseName.toString().take(read.baseName.toString().indexOf("_")) : read.baseName - meta.single_end = single_end.toBoolean() - meta.run = multiple_sequencing_runs ? read.take(read.findLastIndexOf{"/"})[-1] : "1" - [ meta, read ] } - .set { ch_reads } - } else { - //Get files - paired end - Channel - .fromFilePairs( input + folders + extension, size: 2 ) - .ifEmpty { error("${error_message}") } - .map { name, reads -> - def meta = [:] - meta.id = name.toString().indexOf("_") != -1 ? name.toString().take(name.toString().indexOf("_")) : name - meta.single_end = single_end.toBoolean() - meta.run = multiple_sequencing_runs ? reads[0].take(reads[0].findLastIndexOf{"/"})[-1] : "1" - [ meta, reads ] } - .set { ch_reads } - } - if (multiple_sequencing_runs) { - //Get folder information - ch_reads - .flatMap { meta, reads -> [ meta.run ] } - .unique() - .set { ch_folders } - //Report folders with sequencing files - ch_folders - .collect() - .subscribe { - String folders = it.toString().replace("[", "").replace("]","") - log.info "\nFound the folder(s) \"$folders\" containing sequencing read files matching \"${extension}\" in \"${input}\".\n" } - //Stop if folder count is 1 and multiple_sequencing_runs - ch_folders - .count() - .subscribe { if ( it == 1 ) error("Found only one folder with read data but \"--multiple_sequencing_runs\" was specified. Please review data input.") } - } - } - - //Check whether all sampleID = meta.id are unique - ch_reads - .map { meta, reads -> [ meta.id ] } - .toList() - .subscribe { - if( it.size() != it.unique().size() ) { - ids = it.take(10); - error("Please review data input, sample IDs are not unique! First IDs are $ids") - } - } - - //Check that no dots "." are in sampleID - ch_reads - .map { meta, reads -> meta.id } - .subscribe { if ( "$it".contains(".") ) error("Please review data input, sampleIDs may not contain dots, but \"$it\" does.") } - - //Check that sampleIDs do not start with a number when using metadata (sampleID gets X prepended by R and metadata wont match any more!) + //Check folders in folder when multiple_sequencing_runs + folders = multiple_sequencing_runs ? "/*" : "" + error_message = "\nCannot find any reads matching: \"${input}${folders}${extension}\"\n" + error_message += "Please revise the input folder (\"--input\"): \"${input}\"\n" + error_message += "and the input file pattern (\"--extension\"): \"${extension}\"\n" + error_message += "*Please note: Path needs to be enclosed in quotes!*\n" + error_message += multiple_sequencing_runs ? "If you do not have multiple sequencing runs, please do not use \"--multiple_sequencing_runs\"!\n" : "If you have multiple sequencing runs, please add \"--multiple_sequencing_runs\"!\n" + error_message += "In any case, please consult the pipeline documentation.\n" + if ( single_end ) { + //Get files - single end + Channel + .fromPath( input + folders + extension ) + .ifEmpty { error("${error_message}") } + .map { read -> + def meta = [:] + meta.id = read.baseName.toString().indexOf("_") != -1 ? read.baseName.toString().take(read.baseName.toString().indexOf("_")) : read.baseName + meta.single_end = single_end.toBoolean() + meta.run = multiple_sequencing_runs ? read.take(read.findLastIndexOf{"/"})[-1] : "1" + [ meta, read ] } + .set { ch_reads } + } else { + //Get files - paired end + Channel + .fromFilePairs( input + folders + extension, size: 2 ) + .ifEmpty { error("${error_message}") } + .map { name, reads -> + def meta = [:] + meta.id = name.toString().indexOf("_") != -1 ? name.toString().take(name.toString().indexOf("_")) : name + meta.single_end = single_end.toBoolean() + meta.run = multiple_sequencing_runs ? reads[0].take(reads[0].findLastIndexOf{"/"})[-1] : "1" + [ meta, reads ] } + .set { ch_reads } + } + if (multiple_sequencing_runs) { + //Get folder information ch_reads - .map { meta, reads -> meta.id } - .subscribe { if ( params.metadata && "$it"[0].isNumber() ) error("Please review data input, sampleIDs may not start with a number, but \"$it\" does. The pipeline unintentionally modifies such strings and the metadata will not match any more.") } - - //Filter empty files - ch_reads.dump(tag:'parse_input.nf: ch_reads') - .branch { - failed: it[0].single_end ? it[1].countFastq() < params.min_read_counts : it[1][0].countFastq() < params.min_read_counts || it[1][1].countFastq() < params.min_read_counts - passed: true - } - .set { ch_reads_result } - ch_reads_result.passed.set { ch_reads_passed } - ch_reads_result.failed - .map { meta, reads -> [ meta.id ] } + .flatMap { meta, reads -> [ meta.run ] } + .unique() + .set { ch_folders } + //Report folders with sequencing files + ch_folders .collect() .subscribe { - samples = it.join("\n") - if (params.ignore_empty_input_files) { - log.warn "At least one input file for the following sample(s) had too few reads (<$params.min_read_counts):\n$samples\nThe threshold can be adjusted with `--min_read_counts`. Ignoring failed samples and continue!\n" - } else { - error("At least one input file for the following sample(s) had too few reads (<$params.min_read_counts):\n$samples\nEither remove those samples, adjust the threshold with `--min_read_counts`, or ignore that samples using `--ignore_empty_input_files`.") - } - } + String folders = it.toString().replace("[", "").replace("]","") + log.info "\nFound the folder(s) \"$folders\" containing sequencing read files matching \"${extension}\" in \"${input}\".\n" } + //Stop if folder count is 1 and multiple_sequencing_runs + ch_folders + .count() + .subscribe { if ( it == 1 ) error("Found only one folder with read data but \"--multiple_sequencing_runs\" was specified. Please review data input.") } } emit: - reads = ch_reads_passed - fasta = ch_fasta + reads = ch_reads } diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 4d7d1194..3942c9e9 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -4,7 +4,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation' def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) def citation = '\n' + WorkflowMain.citation(workflow) + '\n' @@ -74,8 +74,10 @@ if (params.sintax_ref_taxonomy && !params.skip_taxonomy) { // Set non-params Variables -String[] fasta_extensions = [".fasta", ".fna", ".fa"] // this is the alternative ASV fasta input -is_fasta_input = WorkflowAmpliseq.checkIfFileHasExtension( params.input.toString().toLowerCase(), fasta_extensions ) +// TODO: remove all that following +// String[] fasta_extensions = [".fasta", ".fna", ".fa"] // this is the alternative ASV fasta input +// is_fasta_input = WorkflowAmpliseq.checkIfFileHasExtension( params.input.toString().toLowerCase(), fasta_extensions ) +is_fasta_input = params.input_fasta ? true : false single_end = params.single_end if (params.pacbio || params.iontorrent) { @@ -212,11 +214,45 @@ workflow AMPLISEQ { // // Create a channel for input read files // - PARSE_INPUT ( params.input, is_fasta_input, single_end, params.multiple_sequencing_runs, params.extension ) - ch_reads = PARSE_INPUT.out.reads - // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input") - // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ - // ! There is currently no tooling to help you write a sample sheet schema + if ( params.input ) { + // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ + ch_reads = Channel.fromSamplesheet("input") + .map{ meta, readfw, readrv -> + meta.single_end = single_end.toBoolean() + def reads = readfw && readrv ? [readfw,readrv] : readfw + return [meta, reads] } + ch_fasta = Channel.empty() + } else if ( params.input_fasta ) { + ch_fasta = Channel.fromPath(params.input_fasta, checkIfExists: true) + ch_reads = Channel.empty() + } else if ( params.input_folder ) { + PARSE_INPUT ( params.input_folder, single_end, params.multiple_sequencing_runs, params.extension ) + ch_reads = PARSE_INPUT.out.reads + ch_fasta = Channel.empty() + } else { + error "One of --input, --input_fasta, --input_folder must be provided!" + } + + //Filter empty files + ch_reads.dump(tag:'parse_input.nf: ch_reads') + .branch { + failed: it[0].single_end ? it[1].countFastq() < params.min_read_counts : it[1][0].countFastq() < params.min_read_counts || it[1][1].countFastq() < params.min_read_counts + passed: true + } + .set { ch_reads_result } + ch_reads_result.passed.set { ch_reads } + ch_reads_result.failed + .map { meta, reads -> [ meta.id ] } + .collect() + .subscribe { + samples = it.join("\n") + if (params.ignore_empty_input_files) { + log.warn "At least one input file for the following sample(s) had too few reads (<$params.min_read_counts):\n$samples\nThe threshold can be adjusted with `--min_read_counts`. Ignoring failed samples and continue!\n" + } else { + error("At least one input file for the following sample(s) had too few reads (<$params.min_read_counts):\n$samples\nEither remove those samples, adjust the threshold with `--min_read_counts`, or ignore that samples using `--ignore_empty_input_files`.") + } + } + ch_reads.dump(tag: 'ch_reads') // // MODULE: Rename files @@ -322,7 +358,7 @@ workflow AMPLISEQ { // Modules : Filter rRNA // if ( is_fasta_input ) { - FORMAT_FASTAINPUT( PARSE_INPUT.out.fasta ) + FORMAT_FASTAINPUT( ch_fasta ) ch_unfiltered_fasta = FORMAT_FASTAINPUT.out.fasta } else { ch_unfiltered_fasta = DADA2_MERGE.out.fasta @@ -663,10 +699,13 @@ workflow AMPLISEQ { } //Save input in results folder - input = file(params.input) - if ( is_fasta_input || input.toString().toLowerCase().endsWith("tsv") ) { + if ( params.input ) { + file("${params.outdir}/input").mkdir() + file("${params.input}").copyTo("${params.outdir}/input") + } + if ( params.input_fasta ) { file("${params.outdir}/input").mkdir() - input.copyTo("${params.outdir}/input") + file("${params.input_fasta}").copyTo("${params.outdir}/input") } //Save metadata in results folder if ( params.metadata ) { From 01212b6da54f3101efe9c1fc0eef97b00351e144 Mon Sep 17 00:00:00 2001 From: Adam Bennett Date: Wed, 9 Aug 2023 09:23:50 +0800 Subject: [PATCH 113/230] added phyloseq workflow --- modules/local/phyloseq.nf | 6 +- subworkflows/local/phyloseq_workflow.nf | 75 +++++++++++++++++++++++++ tests/pipeline/iontorrent.nf.test | 3 +- tests/pipeline/multi.nf.test | 3 +- tests/pipeline/pacbio_its.nf.test | 3 +- tests/pipeline/pplace.nf.test | 4 +- tests/pipeline/reftaxcustom.nf.test | 3 +- tests/pipeline/single.nf.test | 3 +- tests/pipeline/sintax.nf.test | 3 +- tests/pipeline/test.nf.test | 4 +- workflows/ampliseq.nf | 73 +++++++----------------- 11 files changed, 109 insertions(+), 71 deletions(-) create mode 100644 subworkflows/local/phyloseq_workflow.nf diff --git a/modules/local/phyloseq.nf b/modules/local/phyloseq.nf index 3df9c6db..4ede387d 100644 --- a/modules/local/phyloseq.nf +++ b/modules/local/phyloseq.nf @@ -54,6 +54,10 @@ process PHYLOSEQ { saveRDS(phy_obj, file = paste0($prefix, "_phyloseq.rds")) # Version information - writeLines(c("\\"${task.process}\\":", paste0(" R: ", paste0(R.Version()[c("major","minor")], collapse = ".")),paste0(" phyloseq: ", packageVersion("phyloseq"))), "versions.yml") + writeLines(c("\\"${task.process}\\":", + paste0(" R: ", paste0(R.Version()[c("major","minor")], collapse = ".")), + paste0(" phyloseq: ", packageVersion("phyloseq"))), + "versions.yml" + ) """ } \ No newline at end of file diff --git a/subworkflows/local/phyloseq_workflow.nf b/subworkflows/local/phyloseq_workflow.nf new file mode 100644 index 00000000..406f7756 --- /dev/null +++ b/subworkflows/local/phyloseq_workflow.nf @@ -0,0 +1,75 @@ +/* + * Create phyloseq objects + */ + +include { PHYLOSEQ } from '../../modules/local/phyloseq' +include { PHYLOSEQ_INASV } from '../../modules/local/phyloseq_inasv' +include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_PPLACE } from '../../modules/local/phyloseq_intax' +include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_QIIME2 } from '../../modules/local/phyloseq_intax' + +workflow PHYLOSEQ_WORKFLOW { + take: + ch_dada2_tax + ch_sintax_tax + ch_pplace_tax + ch_qiime2_tax + ch_tsv + ch_meta + ch_tree + run_qiime2 + + main: + if ( params.metadata ) { + ch_phyloseq_inmeta = ch_meta.first() // The .first() is to make sure it's a value channel + } else { + ch_phyloseq_inmeta = [] + } + + ch_phyloseq_intax = Channel.empty() + if ( !params.skip_dada_taxonomy ) { + ch_phyloseq_intax = ch_phyloseq_intax.mix ( + ch_dada2_tax.map { it = [ "dada2", file(it) ] } + ) + } + + if ( params.sintax_ref_taxonomy ) { + ch_phyloseq_intax = ch_phyloseq_intax.mix ( + ch_sintax_tax.map { it = [ "sintax", file(it) ] } + ) + } + + if ( params.pplace_tree ) { + ch_phyloseq_intax = ch_phyloseq_intax.mix ( + PHYLOSEQ_INTAX_PPLACE ( + ch_pplace_tax + ).tsv.map { it = [ "pplace", file(it) ] } + ) + + ch_phyloseq_intree = ch_tree.map { it = it[1] }.first() + } else { + ch_phyloseq_intree = [] + } + + if ( run_qiime2 ) { + ch_phyloseq_intax = ch_phyloseq_intax.mix ( + PHYLOSEQ_INTAX_QIIME2 ( + ch_qiime2_tax + ).tsv.map { it = [ "qiime2", file(it) ] } + ) + + if ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) { + ch_phyloseq_inasv = PHYLOSEQ_INASV ( ch_tsv ).tsv + + } else { + ch_phyloseq_inasv = ch_tsv + } + } else { + ch_phyloseq_inasv = ch_tsv + } + + PHYLOSEQ ( ch_phyloseq_intax, ch_phyloseq_inasv, ch_phyloseq_inmeta, ch_phyloseq_intree ) + + emit: + rds = PHYLOSEQ.out.rds + versions= PHYLOSEQ.out.versions +} diff --git a/tests/pipeline/iontorrent.nf.test b/tests/pipeline/iontorrent.nf.test index a4a16631..9b73af86 100644 --- a/tests/pipeline/iontorrent.nf.test +++ b/tests/pipeline/iontorrent.nf.test @@ -38,8 +38,7 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/input/Samplesheet_it_SE_ITS.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, - { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } ) } } diff --git a/tests/pipeline/multi.nf.test b/tests/pipeline/multi.nf.test index 75e2e374..e4fe28a0 100644 --- a/tests/pipeline/multi.nf.test +++ b/tests/pipeline/multi.nf.test @@ -63,8 +63,7 @@ nextflow_pipeline { { assert new File("$outputDir/qiime2/representative_sequences/filtered-sequences.qza").exists() }, { assert new File("$outputDir/qiime2/representative_sequences/rep-seq.fasta").exists() }, { assert snapshot(path("$outputDir/qiime2/representative_sequences/descriptive_stats.tsv"), - path("$outputDir/qiime2/representative_sequences/seven_number_summary.tsv")).match("qiime2") }, - { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } + path("$outputDir/qiime2/representative_sequences/seven_number_summary.tsv")).match("qiime2") } ) } } diff --git a/tests/pipeline/pacbio_its.nf.test b/tests/pipeline/pacbio_its.nf.test index 144db928..39e1d2a2 100644 --- a/tests/pipeline/pacbio_its.nf.test +++ b/tests/pipeline/pacbio_its.nf.test @@ -52,8 +52,7 @@ nextflow_pipeline { path("$outputDir/SBDI/emof.tsv"), path("$outputDir/SBDI/event.tsv")).match("SBDI") }, { assert new File("$outputDir/SBDI/annotation.tsv").exists() }, - { assert new File("$outputDir/SBDI/asv-table.tsv").exists() }, - { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } + { assert new File("$outputDir/SBDI/asv-table.tsv").exists() } ) } } diff --git a/tests/pipeline/pplace.nf.test b/tests/pipeline/pplace.nf.test index 9c4b1806..b78c479b 100644 --- a/tests/pipeline/pplace.nf.test +++ b/tests/pipeline/pplace.nf.test @@ -55,9 +55,7 @@ nextflow_pipeline { { assert new File("$outputDir/pplace/test_pplace.taxonomy.per_query.tsv").exists() }, { assert new File("$outputDir/pplace/test_pplace.graft.test_pplace.epa_result.newick").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, - { assert new File("$outputDir/phyloseq/pplace_phyloseq.rds").exists() }, - { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } ) } } diff --git a/tests/pipeline/reftaxcustom.nf.test b/tests/pipeline/reftaxcustom.nf.test index 19035ccb..42e0d104 100644 --- a/tests/pipeline/reftaxcustom.nf.test +++ b/tests/pipeline/reftaxcustom.nf.test @@ -43,8 +43,7 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/input/Samplesheet.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, - { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } ) } } diff --git a/tests/pipeline/single.nf.test b/tests/pipeline/single.nf.test index 1aa634a0..be236c9a 100644 --- a/tests/pipeline/single.nf.test +++ b/tests/pipeline/single.nf.test @@ -44,8 +44,7 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/input/Samplesheet_single_end.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, - { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } ) } } diff --git a/tests/pipeline/sintax.nf.test b/tests/pipeline/sintax.nf.test index fb0c8c15..f6de2995 100644 --- a/tests/pipeline/sintax.nf.test +++ b/tests/pipeline/sintax.nf.test @@ -65,8 +65,7 @@ nextflow_pipeline { { assert new File("$outputDir/sintax/ASV_tax_sintax.unite-fungi.tsv").exists() }, { assert new File("$outputDir/sintax/ref_taxonomy_sintax.txt").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, - { assert new File("$outputDir/phyloseq/sintax_phyloseq.rds").exists() } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } ) } } diff --git a/tests/pipeline/test.nf.test b/tests/pipeline/test.nf.test index e8ba0ce0..7b295941 100644 --- a/tests/pipeline/test.nf.test +++ b/tests/pipeline/test.nf.test @@ -93,9 +93,7 @@ nextflow_pipeline { path("$outputDir/SBDI/emof.tsv"), path("$outputDir/SBDI/event.tsv")).match("SBDI") }, { assert new File("$outputDir/SBDI/annotation.tsv").exists() }, - { assert new File("$outputDir/SBDI/asv-table.tsv").exists() }, - { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() }, - { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() } + { assert new File("$outputDir/SBDI/asv-table.tsv").exists() } ) } } diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index a0b0a9f7..6608b86d 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -165,10 +165,6 @@ include { QIIME2_INTAX } from '../modules/local/qiime2_intax' include { PICRUST } from '../modules/local/picrust' include { SBDIEXPORT } from '../modules/local/sbdiexport' include { SBDIEXPORTREANNOTATE } from '../modules/local/sbdiexportreannotate' -include { PHYLOSEQ } from '../modules/local/phyloseq' -include { PHYLOSEQ_INASV } from '../modules/local/phyloseq_inasv' -include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_PPLACE } from '../modules/local/phyloseq_intax' -include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_QIIME2 } from '../modules/local/phyloseq_intax' // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules @@ -185,6 +181,7 @@ include { QIIME2_EXPORT } from '../subworkflows/local/qiime2_exp include { QIIME2_BARPLOTAVG } from '../subworkflows/local/qiime2_barplotavg' include { QIIME2_DIVERSITY } from '../subworkflows/local/qiime2_diversity' include { QIIME2_ANCOM } from '../subworkflows/local/qiime2_ancom' +include { PHYLOSEQ_WORKFLOW } from '../subworkflows/local/phyloseq_workflow' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -480,6 +477,9 @@ workflow AMPLISEQ { ch_qiime_classifier ) ch_versions = ch_versions.mix( QIIME2_TAXONOMY.out.versions.ifEmpty(null) ) //usually a .first() is here, dont know why this leads here to a warning + ch_qiime2_tax = QIIME2_TAXONOMY.out.tsv + } else { + ch_qiime2_tax = Channel.empty() } // @@ -543,7 +543,7 @@ workflow AMPLISEQ { } //Export various ASV tables if (!params.skip_abundance_tables) { - QIIME2_EXPORT ( ch_asv, ch_seq, ch_tax, QIIME2_TAXONOMY.out.tsv, ch_dada2_tax, ch_pplace_tax, ch_sintax_tax, tax_agglom_min, tax_agglom_max ) + QIIME2_EXPORT ( ch_asv, ch_seq, ch_tax, ch_qiime2_tax, ch_dada2_tax, ch_pplace_tax, ch_sintax_tax, tax_agglom_min, tax_agglom_max ) } if (!params.skip_barplot) { @@ -600,6 +600,8 @@ workflow AMPLISEQ { tax_agglom_max ) } + } else { + ch_tsv = ch_dada2_asv } // @@ -631,59 +633,26 @@ workflow AMPLISEQ { } // - // MODULE: Create phyloseq objects + // SUBWORKFLOW: Create phyloseq objects // if ( !params.skip_taxonomy ) { - if ( params.metadata ) { - ch_phyloseq_inmeta = ch_metadata.first() // The .first() is to make sure it's a value channel - } else { - ch_phyloseq_inmeta = [] - } - - ch_phyloseq_intax = Channel.empty() - if ( !params.skip_dada_taxonomy ) { - ch_phyloseq_intax = ch_phyloseq_intax.mix ( - ch_dada2_tax.map { it = [ "dada2", file(it) ] } - ) - } - - if ( params.sintax_ref_taxonomy ) { - ch_phyloseq_intax = ch_phyloseq_intax.mix ( - ch_sintax_tax.map { it = [ "sintax", file(it) ] } - ) - } - if ( params.pplace_tree ) { - ch_phyloseq_intax = ch_phyloseq_intax.mix ( - PHYLOSEQ_INTAX_PPLACE ( - ch_pplace_tax - ).tsv.map { it = [ "pplace", file(it) ] } - ) - - ch_phyloseq_intree = FASTA_NEWICK_EPANG_GAPPA.out.grafted_phylogeny.map { it = it[1] }.first() + ch_tree_for_phyloseq = FASTA_NEWICK_EPANG_GAPPA.out.grafted_phylogeny } else { - ch_phyloseq_intree = [] + ch_tree_for_phyloseq = [] } - - if ( run_qiime2 ) { - ch_phyloseq_intax = ch_phyloseq_intax.mix ( - PHYLOSEQ_INTAX_QIIME2 ( - QIIME2_TAXONOMY.out.tsv - ).tsv.map { it = [ "qiime2", file(it) ] } - ) - - if ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) { - ch_phyloseq_inasv = PHYLOSEQ_INASV ( QIIME2_FILTERTAXA.out.tsv ).tsv - } else { - ch_phyloseq_inasv = ch_dada2_asv - } - } else { - ch_phyloseq_inasv = ch_dada2_asv - } - - PHYLOSEQ ( ch_phyloseq_intax, ch_phyloseq_inasv, ch_phyloseq_inmeta, ch_phyloseq_intree ) - ch_versions = ch_versions.mix(PHYLOSEQ.out.versions.first()) + PHYLOSEQ_WORKFLOW ( + ch_dada2_tax.ifEmpty([]), + ch_sintax_tax.ifEmpty([]), + ch_pplace_tax.ifEmpty([]), + ch_qiime2_tax.ifEmpty([]), + ch_tsv, + ch_metadata.ifEmpty([]), + ch_tree_for_phyloseq, + run_qiime2 + ) + ch_versions = ch_versions.mix(PHYLOSEQ_WORKFLOW.out.versions.first()) } CUSTOM_DUMPSOFTWAREVERSIONS ( From 29c47b6219863fba1516285133895bac3e83a7e3 Mon Sep 17 00:00:00 2001 From: Adam Bennett Date: Thu, 10 Aug 2023 14:54:26 +0800 Subject: [PATCH 114/230] modified test files for phyloseq and fixed a bug with phyloseq workflow that happens in some test profiles --- bin/reformat_tax_for_phyloseq.py | 6 +-- modules/local/phyloseq.nf | 14 +++---- subworkflows/local/phyloseq_workflow.nf | 49 +++++------------------- tests/pipeline/iontorrent.nf.test | 3 +- tests/pipeline/iontorrent.nf.test.snap | 2 +- tests/pipeline/multi.nf.test | 3 +- tests/pipeline/multi.nf.test.snap | 2 +- tests/pipeline/pacbio_its.nf.test | 3 +- tests/pipeline/pacbio_its.nf.test.snap | 2 +- tests/pipeline/pplace.nf.test | 4 +- tests/pipeline/pplace.nf.test.snap | 2 +- tests/pipeline/reftaxcustom.nf.test | 3 +- tests/pipeline/reftaxcustom.nf.test.snap | 2 +- tests/pipeline/single.nf.test | 4 +- tests/pipeline/single.nf.test.snap | 2 +- tests/pipeline/sintax.nf.test | 3 +- tests/pipeline/sintax.nf.test.snap | 2 +- tests/pipeline/test.nf.test | 4 +- tests/pipeline/test.nf.test.snap | 2 +- workflows/ampliseq.nf | 16 +++++--- 20 files changed, 57 insertions(+), 71 deletions(-) diff --git a/bin/reformat_tax_for_phyloseq.py b/bin/reformat_tax_for_phyloseq.py index 9a3281fb..f35aaf03 100755 --- a/bin/reformat_tax_for_phyloseq.py +++ b/bin/reformat_tax_for_phyloseq.py @@ -13,10 +13,10 @@ tax_col = tax_df.columns[1] # Split the values in the tax column -split_tax = tax_df[tax_col].str.split(';', expand=True) +split_tax = tax_df[tax_col].str.split(";", expand=True) # Assign names to the new columns with an auto incrementing integer -new_col_names = [f'{tax_col}_{i+1}' for i in range(split_tax.shape[1])] +new_col_names = [f"{tax_col}_{i+1}" for i in range(split_tax.shape[1])] split_tax.columns = new_col_names # Strip whitespace from the tax names @@ -29,4 +29,4 @@ result = pd.concat([tax_df, split_tax], axis=1) # Create new tsv file -result.to_csv(out_file, sep='\t', index=False) +result.to_csv(out_file, sep="\t", index=False) diff --git a/modules/local/phyloseq.nf b/modules/local/phyloseq.nf index 4ede387d..54537213 100644 --- a/modules/local/phyloseq.nf +++ b/modules/local/phyloseq.nf @@ -1,7 +1,7 @@ process PHYLOSEQ { tag "$prefix" label 'process_low' - + conda "bioconda::bioconductor-phyloseq=1.44.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bioconductor-phyloseq:1.44.0--r43hdfd78af_0' : @@ -9,10 +9,10 @@ process PHYLOSEQ { input: tuple val(prefix), path(tax_tsv) - path otu_tsv + path otu_tsv path sam_tsv path tree - + output: tuple val(prefix), path("*phyloseq.rds"), emit: rds path "versions.yml" , emit: versions @@ -52,12 +52,12 @@ process PHYLOSEQ { } saveRDS(phy_obj, file = paste0($prefix, "_phyloseq.rds")) - + # Version information - writeLines(c("\\"${task.process}\\":", + writeLines(c("\\"${task.process}\\":", paste0(" R: ", paste0(R.Version()[c("major","minor")], collapse = ".")), - paste0(" phyloseq: ", packageVersion("phyloseq"))), + paste0(" phyloseq: ", packageVersion("phyloseq"))), "versions.yml" ) """ -} \ No newline at end of file +} diff --git a/subworkflows/local/phyloseq_workflow.nf b/subworkflows/local/phyloseq_workflow.nf index 406f7756..adf208b7 100644 --- a/subworkflows/local/phyloseq_workflow.nf +++ b/subworkflows/local/phyloseq_workflow.nf @@ -4,70 +4,39 @@ include { PHYLOSEQ } from '../../modules/local/phyloseq' include { PHYLOSEQ_INASV } from '../../modules/local/phyloseq_inasv' -include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_PPLACE } from '../../modules/local/phyloseq_intax' -include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_QIIME2 } from '../../modules/local/phyloseq_intax' workflow PHYLOSEQ_WORKFLOW { take: - ch_dada2_tax - ch_sintax_tax - ch_pplace_tax - ch_qiime2_tax + ch_tax ch_tsv ch_meta ch_tree run_qiime2 - + main: if ( params.metadata ) { ch_phyloseq_inmeta = ch_meta.first() // The .first() is to make sure it's a value channel - } else { - ch_phyloseq_inmeta = [] - } - - ch_phyloseq_intax = Channel.empty() - if ( !params.skip_dada_taxonomy ) { - ch_phyloseq_intax = ch_phyloseq_intax.mix ( - ch_dada2_tax.map { it = [ "dada2", file(it) ] } - ) - } - - if ( params.sintax_ref_taxonomy ) { - ch_phyloseq_intax = ch_phyloseq_intax.mix ( - ch_sintax_tax.map { it = [ "sintax", file(it) ] } - ) + } else { + ch_phyloseq_inmeta = [] } if ( params.pplace_tree ) { - ch_phyloseq_intax = ch_phyloseq_intax.mix ( - PHYLOSEQ_INTAX_PPLACE ( - ch_pplace_tax - ).tsv.map { it = [ "pplace", file(it) ] } - ) - ch_phyloseq_intree = ch_tree.map { it = it[1] }.first() } else { ch_phyloseq_intree = [] } - - if ( run_qiime2 ) { - ch_phyloseq_intax = ch_phyloseq_intax.mix ( - PHYLOSEQ_INTAX_QIIME2 ( - ch_qiime2_tax - ).tsv.map { it = [ "qiime2", file(it) ] } - ) + if ( run_qiime2 ) { if ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) { ch_phyloseq_inasv = PHYLOSEQ_INASV ( ch_tsv ).tsv - - } else { - ch_phyloseq_inasv = ch_tsv + } else { + ch_phyloseq_inasv = ch_tsv } - } else { + } else { ch_phyloseq_inasv = ch_tsv } - PHYLOSEQ ( ch_phyloseq_intax, ch_phyloseq_inasv, ch_phyloseq_inmeta, ch_phyloseq_intree ) + PHYLOSEQ ( ch_tax, ch_phyloseq_inasv, ch_phyloseq_inmeta, ch_phyloseq_intree ) emit: rds = PHYLOSEQ.out.rds diff --git a/tests/pipeline/iontorrent.nf.test b/tests/pipeline/iontorrent.nf.test index 9b73af86..a4a16631 100644 --- a/tests/pipeline/iontorrent.nf.test +++ b/tests/pipeline/iontorrent.nf.test @@ -38,7 +38,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/input/Samplesheet_it_SE_ITS.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/iontorrent.nf.test.snap b/tests/pipeline/iontorrent.nf.test.snap index c9c8f4bb..c7fbfb89 100644 --- a/tests/pipeline/iontorrent.nf.test.snap +++ b/tests/pipeline/iontorrent.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-06-20T01:42:35+0000" }, diff --git a/tests/pipeline/multi.nf.test b/tests/pipeline/multi.nf.test index e4fe28a0..75e2e374 100644 --- a/tests/pipeline/multi.nf.test +++ b/tests/pipeline/multi.nf.test @@ -63,7 +63,8 @@ nextflow_pipeline { { assert new File("$outputDir/qiime2/representative_sequences/filtered-sequences.qza").exists() }, { assert new File("$outputDir/qiime2/representative_sequences/rep-seq.fasta").exists() }, { assert snapshot(path("$outputDir/qiime2/representative_sequences/descriptive_stats.tsv"), - path("$outputDir/qiime2/representative_sequences/seven_number_summary.tsv")).match("qiime2") } + path("$outputDir/qiime2/representative_sequences/seven_number_summary.tsv")).match("qiime2") }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/multi.nf.test.snap b/tests/pipeline/multi.nf.test.snap index 2f0095ac..25b1437c 100644 --- a/tests/pipeline/multi.nf.test.snap +++ b/tests/pipeline/multi.nf.test.snap @@ -14,7 +14,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-05-28T21:15:03+0000" }, diff --git a/tests/pipeline/pacbio_its.nf.test b/tests/pipeline/pacbio_its.nf.test index 39e1d2a2..144db928 100644 --- a/tests/pipeline/pacbio_its.nf.test +++ b/tests/pipeline/pacbio_its.nf.test @@ -52,7 +52,8 @@ nextflow_pipeline { path("$outputDir/SBDI/emof.tsv"), path("$outputDir/SBDI/event.tsv")).match("SBDI") }, { assert new File("$outputDir/SBDI/annotation.tsv").exists() }, - { assert new File("$outputDir/SBDI/asv-table.tsv").exists() } + { assert new File("$outputDir/SBDI/asv-table.tsv").exists() }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/pacbio_its.nf.test.snap b/tests/pipeline/pacbio_its.nf.test.snap index 3c860a89..775e5195 100644 --- a/tests/pipeline/pacbio_its.nf.test.snap +++ b/tests/pipeline/pacbio_its.nf.test.snap @@ -35,7 +35,7 @@ }, "software_versions": { "content": [ - "{ASSIGNSH={pandas=1.1.5, python=3.9.1}, BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FORMAT_TAXRESULTS_STD={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_USEARCHGLOBAL={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" + "{ASSIGNSH={pandas=1.1.5, python=3.9.1}, BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FORMAT_TAXRESULTS_STD={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_USEARCHGLOBAL={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-06-20T02:07:02+0000" }, diff --git a/tests/pipeline/pplace.nf.test b/tests/pipeline/pplace.nf.test index b78c479b..956e88b3 100644 --- a/tests/pipeline/pplace.nf.test +++ b/tests/pipeline/pplace.nf.test @@ -55,7 +55,9 @@ nextflow_pipeline { { assert new File("$outputDir/pplace/test_pplace.taxonomy.per_query.tsv").exists() }, { assert new File("$outputDir/pplace/test_pplace.graft.test_pplace.epa_result.newick").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() }, + { assert new File("$outputDir/phyloseq/sintax_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/pplace.nf.test.snap b/tests/pipeline/pplace.nf.test.snap index d0aa5f26..9ee79d29 100644 --- a/tests/pipeline/pplace.nf.test.snap +++ b/tests/pipeline/pplace.nf.test.snap @@ -8,7 +8,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, EPANG_PLACE={epang=0.3.8}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, GAPPA_ASSIGN={gappa=0.8.0}, GAPPA_GRAFT={gappa=0.8.0}, GAPPA_HEATTREE={gappa=0.8.0}, HMMER_AFAFORMATQUERY={hmmer/easel=0.48}, HMMER_AFAFORMATREF={hmmer/easel=0.48}, HMMER_HMMALIGNQUERY={hmmer=3.3.2}, HMMER_HMMALIGNREF={hmmer=3.3.2}, HMMER_HMMBUILD={hmmer=3.3.2}, HMMER_MASKQUERY={hmmer/easel=0.48}, HMMER_MASKREF={hmmer/easel=0.48}, HMMER_UNALIGNREF={hmmer/easel=0.48}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, EPANG_PLACE={epang=0.3.8}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, GAPPA_ASSIGN={gappa=0.8.0}, GAPPA_GRAFT={gappa=0.8.0}, GAPPA_HEATTREE={gappa=0.8.0}, HMMER_AFAFORMATQUERY={hmmer/easel=0.48}, HMMER_AFAFORMATREF={hmmer/easel=0.48}, HMMER_HMMALIGNQUERY={hmmer=3.3.2}, HMMER_HMMALIGNREF={hmmer=3.3.2}, HMMER_HMMBUILD={hmmer=3.3.2}, HMMER_MASKQUERY={hmmer/easel=0.48}, HMMER_MASKREF={hmmer/easel=0.48}, HMMER_UNALIGNREF={hmmer/easel=0.48}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-06-20T17:24:03+0000" }, diff --git a/tests/pipeline/reftaxcustom.nf.test b/tests/pipeline/reftaxcustom.nf.test index 42e0d104..19035ccb 100644 --- a/tests/pipeline/reftaxcustom.nf.test +++ b/tests/pipeline/reftaxcustom.nf.test @@ -43,7 +43,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/input/Samplesheet.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap index 6407a3bf..7b33f261 100644 --- a/tests/pipeline/reftaxcustom.nf.test.snap +++ b/tests/pipeline/reftaxcustom.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-05-28T21:18:54+0000" }, diff --git a/tests/pipeline/single.nf.test b/tests/pipeline/single.nf.test index be236c9a..08ddeca2 100644 --- a/tests/pipeline/single.nf.test +++ b/tests/pipeline/single.nf.test @@ -44,7 +44,9 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/input/Samplesheet_single_end.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() }, + { assert new File("$outputDir/phyloseq/sintax_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/single.nf.test.snap b/tests/pipeline/single.nf.test.snap index 49d65106..bd9096d0 100644 --- a/tests/pipeline/single.nf.test.snap +++ b/tests/pipeline/single.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-05-28T20:35:33+0000" }, diff --git a/tests/pipeline/sintax.nf.test b/tests/pipeline/sintax.nf.test index f6de2995..fb0c8c15 100644 --- a/tests/pipeline/sintax.nf.test +++ b/tests/pipeline/sintax.nf.test @@ -65,7 +65,8 @@ nextflow_pipeline { { assert new File("$outputDir/sintax/ASV_tax_sintax.unite-fungi.tsv").exists() }, { assert new File("$outputDir/sintax/ref_taxonomy_sintax.txt").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/phyloseq/sintax_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/sintax.nf.test.snap b/tests/pipeline/sintax.nf.test.snap index c9745541..5f360a4b 100644 --- a/tests/pipeline/sintax.nf.test.snap +++ b/tests/pipeline/sintax.nf.test.snap @@ -16,7 +16,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-06-20T16:40:18+0000" }, diff --git a/tests/pipeline/test.nf.test b/tests/pipeline/test.nf.test index 7b295941..e8ba0ce0 100644 --- a/tests/pipeline/test.nf.test +++ b/tests/pipeline/test.nf.test @@ -93,7 +93,9 @@ nextflow_pipeline { path("$outputDir/SBDI/emof.tsv"), path("$outputDir/SBDI/event.tsv")).match("SBDI") }, { assert new File("$outputDir/SBDI/annotation.tsv").exists() }, - { assert new File("$outputDir/SBDI/asv-table.tsv").exists() } + { assert new File("$outputDir/SBDI/asv-table.tsv").exists() }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() }, + { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/test.nf.test.snap b/tests/pipeline/test.nf.test.snap index fdf84093..b345de55 100644 --- a/tests/pipeline/test.nf.test.snap +++ b/tests/pipeline/test.nf.test.snap @@ -22,7 +22,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-05-28T20:55:32+0000" }, diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 3ef61dc1..04255de8 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -123,6 +123,9 @@ if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) if ( workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 ) { log.warn "Conda or mamba is enabled, any steps involving QIIME2 are not available. Use a container engine instead of conda to enable all software." } } +// This tracks tax tables produced during pipeline and each table will be used during phyloseq +ch_tax_for_phyloseq = Channel.empty() + /* ======================================================================================== @@ -163,6 +166,8 @@ include { QIIME2_INTAX } from '../modules/local/qiime2_intax' include { PICRUST } from '../modules/local/picrust' include { SBDIEXPORT } from '../modules/local/sbdiexport' include { SBDIEXPORTREANNOTATE } from '../modules/local/sbdiexportreannotate' +include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_PPLACE } from '../modules/local/phyloseq_intax' +include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_QIIME2 } from '../modules/local/phyloseq_intax' // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules @@ -424,6 +429,7 @@ workflow AMPLISEQ { taxlevels ).tax.set { ch_dada2_tax } ch_versions = ch_versions.mix(DADA2_TAXONOMY_WF.out.versions) + ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( ch_dada2_tax.map { it = [ "dada2", file(it) ] } ) } else { ch_dada2_tax = Channel.empty() } @@ -438,6 +444,7 @@ workflow AMPLISEQ { sintax_taxlevels ).tax.set { ch_sintax_tax } ch_versions = ch_versions.mix(SINTAX_TAXONOMY_WF.out.versions) + ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( ch_sintax_tax.map { it = [ "sintax", file(it) ] } ) } else { ch_sintax_tax = Channel.empty() } @@ -459,6 +466,7 @@ workflow AMPLISEQ { FASTA_NEWICK_EPANG_GAPPA ( ch_pp_data ) ch_versions = ch_versions.mix( FASTA_NEWICK_EPANG_GAPPA.out.versions ) ch_pplace_tax = FORMAT_PPLACETAX ( FASTA_NEWICK_EPANG_GAPPA.out.taxonomy_per_query ).tsv + ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( PHYLOSEQ_INTAX_PPLACE ( ch_pplace_tax ).tsv.map { it = [ "pplace", file(it) ] } ) } else { ch_pplace_tax = Channel.empty() } @@ -479,6 +487,7 @@ workflow AMPLISEQ { ) ch_versions = ch_versions.mix( QIIME2_TAXONOMY.out.versions.ifEmpty(null) ) //usually a .first() is here, dont know why this leads here to a warning ch_qiime2_tax = QIIME2_TAXONOMY.out.tsv + ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( PHYLOSEQ_INTAX_QIIME2 ( ch_qiime2_tax ).tsv.map { it = [ "qiime2", file(it) ] } ) } else { ch_qiime2_tax = Channel.empty() } @@ -643,11 +652,8 @@ workflow AMPLISEQ { ch_tree_for_phyloseq = [] } - PHYLOSEQ_WORKFLOW ( - ch_dada2_tax.ifEmpty([]), - ch_sintax_tax.ifEmpty([]), - ch_pplace_tax.ifEmpty([]), - ch_qiime2_tax.ifEmpty([]), + PHYLOSEQ_WORKFLOW ( + ch_tax_for_phyloseq, ch_tsv, ch_metadata.ifEmpty([]), ch_tree_for_phyloseq, From 4baba8147f6ac30d39b5879fa4bd8fb19c3689b7 Mon Sep 17 00:00:00 2001 From: Adam Bennett Date: Thu, 10 Aug 2023 16:19:53 +0800 Subject: [PATCH 115/230] fixed file checks --- tests/pipeline/pplace.nf.test | 3 +-- tests/pipeline/single.nf.test | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/pipeline/pplace.nf.test b/tests/pipeline/pplace.nf.test index 956e88b3..d348bee8 100644 --- a/tests/pipeline/pplace.nf.test +++ b/tests/pipeline/pplace.nf.test @@ -56,8 +56,7 @@ nextflow_pipeline { { assert new File("$outputDir/pplace/test_pplace.graft.test_pplace.epa_result.newick").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, - { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() }, - { assert new File("$outputDir/phyloseq/sintax_phyloseq.rds").exists() } + { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/single.nf.test b/tests/pipeline/single.nf.test index 08ddeca2..1aa634a0 100644 --- a/tests/pipeline/single.nf.test +++ b/tests/pipeline/single.nf.test @@ -45,8 +45,7 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, - { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() }, - { assert new File("$outputDir/phyloseq/sintax_phyloseq.rds").exists() } + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } ) } } From bfe5bb1254f4015401c5d60f9a16f29ded90dd58 Mon Sep 17 00:00:00 2001 From: Daniel Straub <42973691+d4straub@users.noreply.github.com> Date: Thu, 10 Aug 2023 15:14:05 +0200 Subject: [PATCH 116/230] Update assets/report_template.Rmd Co-authored-by: Till E. <64961761+tillenglert@users.noreply.github.com> --- assets/report_template.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 2b7aed3d..e31eb6be 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -584,7 +584,7 @@ cat("# Filtering of ASVs\n") ```{r, eval = !isFALSE(params$path_barrnap_sum), results='asis'} cat("## rRNA detection\n") -cat("[Barrnap](https://github.com/tseemann/barrnap) classifies the ASVs into the origin domain (including mitochondiral origin).\n\n", sep = "") +cat("[Barrnap](https://github.com/tseemann/barrnap) classifies the ASVs into the origin domain (including mitochondrial origin).\n\n", sep = "") # Read the barrnap files and count the lines barrnap_sum = read.table( params$path_barrnap_sum, header = TRUE, sep = "\t", stringsAsFactors = FALSE) From 1a3ec4a74b30add3877a75fa846213c3f0e7fa90 Mon Sep 17 00:00:00 2001 From: daniel Date: Thu, 10 Aug 2023 15:22:15 +0200 Subject: [PATCH 117/230] set limits for barrnap plot --- assets/report_template.Rmd | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index e31eb6be..a14cdc6c 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -625,7 +625,8 @@ plot_barrnap_df_sum <- ggplot(barrnap_df_sum, ylab("% Classification") + xlab("rRNA origins") + coord_flip() + - theme_bw() + theme_bw() + + ylim(0, 100) plot_barrnap_df_sum svg("rrna_detection_with_barrnap.svg") @@ -638,7 +639,7 @@ cat("\n\nrRNA classification results can be found in folder [barrnap](../barrnap ```{r, eval = !isFALSE(params$path_barrnap_sum) && !isFALSE(params$filter_ssu), results='asis'} -cat("\n\nASVs were filtered for `",params$filter_ssu,"` (`bac`: bacteria, `arc`: archaea, `mito`: metazoan mitochondria, `euk`: eukaryotes) +cat("\n\nASVs were filtered for `",params$filter_ssu,"` (`bac`: Bacteria, `arc`: Archaea, `mito`: Mitochondria, `euk`: Eukaryotes) using the above classification. The following table shows read counts for each sample before and after filtering:\n\n", sep = "") # Read the barrnap stats file From d47e874335c64ce8b2c6b7e4ab1dbf7e5e7aa7a9 Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 11 Aug 2023 14:58:00 +0200 Subject: [PATCH 118/230] rm is_fasta_input --- CHANGELOG.md | 8 +++++- assets/schema_input.json | 3 ++- subworkflows/local/parse_input.nf | 21 ++++++++++++++++ workflows/ampliseq.nf | 41 ++++++++++++++----------------- 4 files changed, 49 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bdbecec5..e9e8ee1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` -- [#TODO](https://github.com/nf-core/ampliseq/pull/TODO) - `--input` was split into three params: (1) `--input` for samplesheet, (2) `--input_fasta` for ASV/OTU fasta input, (3) `--input_folder` direct FASTQ input +- [#616](https://github.com/nf-core/ampliseq/pull/616) - `--input` was split into three params: (1) `--input` for samplesheet, (2) `--input_fasta` for ASV/OTU fasta input, (3) `--input_folder` direct FASTQ input + +| Param updated | Param old | Accepts | +| ------------- | --------- | ---------------------------------------- | +| input | input | samplesheet, .tsv | +| input_fasta | input | ASV/OTU sequences, .fasta | +| input_folder | input | Folder containing compressed fastq files | ### `Fixed` diff --git a/assets/schema_input.json b/assets/schema_input.json index 8ff6422a..8a016da6 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -10,7 +10,8 @@ "sampleID": { "type": "string", "pattern": "^[a-zA-Z][a-zA-Z0-9_]+$", - "errorMessage": "Sample name must be provided, must start with a letter, and can only contain letters, numbers or underscores; Regex: '^[a-zA-Z][a-zA-Z0-9_]+$'", + "unique": true, + "errorMessage": "Unique sample ID must be provided: Must start with a letter, and can only contain letters, numbers or underscores; Regex: '^[a-zA-Z][a-zA-Z0-9_]+$'", "meta": ["id"] }, "forwardReads": { diff --git a/subworkflows/local/parse_input.nf b/subworkflows/local/parse_input.nf index 12521aa8..ba8aa484 100644 --- a/subworkflows/local/parse_input.nf +++ b/subworkflows/local/parse_input.nf @@ -59,6 +59,27 @@ workflow PARSE_INPUT { .subscribe { if ( it == 1 ) error("Found only one folder with read data but \"--multiple_sequencing_runs\" was specified. Please review data input.") } } + //Check whether all sampleID = meta.id are unique + ch_reads + .map { meta, reads -> [ meta.id ] } + .toList() + .subscribe { + if( it.size() != it.unique().size() ) { + ids = it.take(10); + error("Please review data input, sample IDs are not unique! First IDs are $ids") + } + } + + //Check that no dots "." are in sampleID + ch_reads + .map { meta, reads -> meta.id } + .subscribe { if ( "$it".contains(".") ) error("Please review data input, sampleIDs may not contain dots, but \"$it\" does.") } + + //Check that sampleIDs do not start with a number when using metadata (sampleID gets X prepended by R and metadata wont match any more!) + ch_reads + .map { meta, reads -> meta.id } + .subscribe { if ( params.metadata && "$it"[0].isNumber() ) error("Please review data input, sampleIDs may not start with a number, but \"$it\" does. The pipeline unintentionally modifies such strings and the metadata will not match any more.") } + emit: reads = ch_reads } diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index eaf682b7..258b1a75 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -79,11 +79,6 @@ ch_report_abstract = params.report_abstract ? Channel.fromPath(params.report_abs // Set non-params Variables -// TODO: remove all that following -// String[] fasta_extensions = [".fasta", ".fna", ".fa"] // this is the alternative ASV fasta input -// is_fasta_input = WorkflowAmpliseq.checkIfFileHasExtension( params.input.toString().toLowerCase(), fasta_extensions ) -is_fasta_input = params.input_fasta ? true : false - single_end = params.single_end if (params.pacbio || params.iontorrent) { single_end = true @@ -91,12 +86,12 @@ if (params.pacbio || params.iontorrent) { trunclenf = params.trunclenf ?: 0 trunclenr = params.trunclenr ?: 0 -if ( !single_end && !params.illumina_pe_its && (params.trunclenf == null || params.trunclenr == null) && !is_fasta_input ) { +if ( !single_end && !params.illumina_pe_its && (params.trunclenf == null || params.trunclenr == null) && !params.input_fasta ) { find_truncation_values = true log.warn "No DADA2 cutoffs were specified (`--trunclenf` & `--trunclenr`), therefore reads will be truncated where median quality drops below ${params.trunc_qmin} (defined by `--trunc_qmin`) but at least a fraction of ${params.trunc_rmin} (defined by `--trunc_rmin`) of the reads will be retained.\nThe chosen cutoffs do not account for required overlap for merging, therefore DADA2 might have poor merging efficiency or even fail.\n" } else { find_truncation_values = false } -if ( !is_fasta_input && (!params.FW_primer || !params.RV_primer) && !params.skip_cutadapt ) { +if ( !params.input_fasta && (!params.FW_primer || !params.RV_primer) && !params.skip_cutadapt ) { error("Incompatible parameters: `--FW_primer` and `--RV_primer` are required for primer trimming. If primer trimming is not needed, use `--skip_cutadapt`.") } @@ -224,29 +219,28 @@ workflow AMPLISEQ { ch_versions = Channel.empty() // - // Create a channel for input read files + // Create input channels // + ch_input_fasta = Channel.empty() + ch_input_reads = Channel.empty() if ( params.input ) { // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ - ch_reads = Channel.fromSamplesheet("input") + ch_input_reads = Channel.fromSamplesheet("input") .map{ meta, readfw, readrv -> meta.single_end = single_end.toBoolean() - def reads = readfw && readrv ? [readfw,readrv] : readfw + def reads = single_end ? readfw : [readfw,readrv] return [meta, reads] } - ch_fasta = Channel.empty() } else if ( params.input_fasta ) { - ch_fasta = Channel.fromPath(params.input_fasta, checkIfExists: true) - ch_reads = Channel.empty() + ch_input_fasta = Channel.fromPath(params.input_fasta, checkIfExists: true) } else if ( params.input_folder ) { PARSE_INPUT ( params.input_folder, single_end, params.multiple_sequencing_runs, params.extension ) - ch_reads = PARSE_INPUT.out.reads - ch_fasta = Channel.empty() + ch_input_reads = PARSE_INPUT.out.reads } else { error "One of --input, --input_fasta, --input_folder must be provided!" } //Filter empty files - ch_reads.dump(tag:'parse_input.nf: ch_reads') + ch_input_reads.dump(tag:'ch_input_reads') .branch { failed: it[0].single_end ? it[1].countFastq() < params.min_read_counts : it[1][0].countFastq() < params.min_read_counts || it[1][1].countFastq() < params.min_read_counts passed: true @@ -369,8 +363,8 @@ workflow AMPLISEQ { // // Modules : Filter rRNA // - if ( is_fasta_input ) { - FORMAT_FASTAINPUT( ch_fasta ) + if ( params.input_fasta ) { + FORMAT_FASTAINPUT( ch_input_fasta ) ch_unfiltered_fasta = FORMAT_FASTAINPUT.out.fasta } else { ch_unfiltered_fasta = DADA2_MERGE.out.fasta @@ -710,6 +704,9 @@ workflow AMPLISEQ { ch_versions = ch_versions.mix(PHYLOSEQ_WORKFLOW.out.versions.first()) } + // + // MODULE: Sortware versions + // CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') ) @@ -754,9 +751,9 @@ workflow AMPLISEQ { ch_report_logo, ch_report_abstract, ch_metadata.ifEmpty( [] ), - params.input.toString().toLowerCase().endsWith("tsv") ? file(params.input) : [], // samplesheet input - is_fasta_input ? PARSE_INPUT.out.fasta.ifEmpty( [] ) : [], // fasta input - !is_fasta_input && !params.skip_fastqc && !params.skip_multiqc ? MULTIQC.out.plots : [], //.collect().flatten().collectFile(name: "mqc_fastqc_per_sequence_quality_scores_plot_1.svg") + params.input ? file(params.input) : [], // samplesheet input + ch_fasta.ifEmpty( [] ), // fasta input + !params.skip_fastqc && !params.skip_multiqc ? MULTIQC.out.plots : [], //.collect().flatten().collectFile(name: "mqc_fastqc_per_sequence_quality_scores_plot_1.svg") !params.skip_cutadapt ? CUTADAPT_WORKFLOW.out.summary.collect().ifEmpty( [] ) : [], find_truncation_values, DADA2_PREPROCESSING.out.args.first().ifEmpty( [] ), @@ -777,7 +774,7 @@ workflow AMPLISEQ { [ meta, svgs.flatten() ] }.ifEmpty( [[],[]] ), DADA2_MERGE.out.asv.ifEmpty( [] ), - ch_unfiltered_fasta.ifEmpty( [] ), // this is identical to DADA2_MERGE.out.fasta if !is_fasta_input + ch_unfiltered_fasta.ifEmpty( [] ), // this is identical to DADA2_MERGE.out.fasta if !params.input_fasta DADA2_MERGE.out.dada2asv.ifEmpty( [] ), DADA2_MERGE.out.dada2stats.ifEmpty( [] ), !params.skip_barrnap ? BARRNAPSUMMARY.out.summary.ifEmpty( [] ) : [], From ad4790a4463327360ea016935963584917d553ab Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 11 Aug 2023 15:50:44 +0200 Subject: [PATCH 119/230] fix report ch_fasta --- workflows/ampliseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 258b1a75..2a8c1f05 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -752,7 +752,7 @@ workflow AMPLISEQ { ch_report_abstract, ch_metadata.ifEmpty( [] ), params.input ? file(params.input) : [], // samplesheet input - ch_fasta.ifEmpty( [] ), // fasta input + ch_input_fasta.ifEmpty( [] ), // fasta input !params.skip_fastqc && !params.skip_multiqc ? MULTIQC.out.plots : [], //.collect().flatten().collectFile(name: "mqc_fastqc_per_sequence_quality_scores_plot_1.svg") !params.skip_cutadapt ? CUTADAPT_WORKFLOW.out.summary.collect().ifEmpty( [] ) : [], find_truncation_values, From c03e1d30139bed39aaa134341d1f0278ae4b7440 Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 11 Aug 2023 15:52:48 +0200 Subject: [PATCH 120/230] update documentation --- docs/output.md | 4 +- docs/usage.md | 125 ++++++++++++++++++++++--------------------- nextflow_schema.json | 6 +-- 3 files changed, 68 insertions(+), 67 deletions(-) diff --git a/docs/output.md b/docs/output.md index 9e9eb75a..58e68998 100644 --- a/docs/output.md +++ b/docs/output.md @@ -55,9 +55,9 @@ Samplesheet, ASV fasta, and metadata file are copied into the results folder. Output files - `input/` - - `*.tsv`: Samplesheet input if specified with `--input`. + - `*`: Samplesheet input if specified with `--input`. - `*.tsv`: Metadata input if specified with `--metadata`. - - `*.fasta|.fna|.fa`: ASV sequence input if specified with `--input`. + - `*`: ASV sequence input if specified with `--input_fasta`. diff --git a/docs/usage.md b/docs/usage.md index 59fe9f64..b1fecaa8 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,9 +10,9 @@ - [Quick start](#quick-start) - [Setting parameters in a file](#setting-parameters-in-a-file) - [Input specifications](#input-specifications) - - [Direct FASTQ input](#direct-fastq-input) - [Samplesheet input](#samplesheet-input) - [ASV/OTU fasta input](#asvotu-fasta-input) + - [Direct FASTQ input](#direct-fastq-input) - [Metadata](#metadata) - [Updating the pipeline](#updating-the-pipeline) - [Reproducibility](#reproducibility) @@ -35,16 +35,16 @@ The typical command for running the pipeline is as follows: ```bash nextflow run nf-core/ampliseq \ - -r 2.3.2 \ + -r 2.6.1 \ -profile singularity \ - --input "data" \ + --input "samplesheet.tsv" \ --FW_primer GTGYCAGCMGCCGCGGTAA \ --RV_primer GGACTACNVGGGTWTCTAAT \ --metadata "data/Metadata.tsv" --outdir "./results" ``` -In this example, `--input` is the [Direct FASTQ input](#direct-fastq-input), other options are [Samplesheet input](#samplesheet-input) and [ASV/OTU fasta input](#asvotu-fasta-input). For more details on metadata, see [Metadata](#metadata). For [Reproducibility](#reproducibility), specify the version to run using `-r` (= release, e.g. 2.6.1). See the [nf-core/ampliseq website documentation](https://nf-co.re/ampliseq/parameters) for more information about pipeline specific parameters. +In this example, `--input` is the [Samplesheet input](#samplesheet-input), other options are [Direct FASTQ input](#direct-fastq-input) and [ASV/OTU fasta input](#asvotu-fasta-input). For more details on metadata, see [Metadata](#metadata). For [Reproducibility](#reproducibility), specify the version to run using `-r` (= release, e.g. 2.6.1, please use the most recent release). See the [nf-core/ampliseq website documentation](https://nf-co.re/ampliseq/parameters) for more information about pipeline specific parameters. It is possible to not provide primer sequences (`--FW_primer` & `--RV_primer`) and skip primer trimming using `--skip_cutadapt`, but this is only for data that indeed does not contain any PCR primers in their sequences. Also, metadata (`--metadata`) isnt required, but aids downstream analysis. @@ -82,7 +82,7 @@ nextflow run nf-core/ampliseq -profile docker -params-file params.yaml with `params.yaml` containing: ```yaml -input: "data" +input: "samplesheet.tsv" FW_primer: "GTGYCAGCMGCCGCGGTAA" RV_primer: "GGACTACNVGGGTWTCTAAT" metadata: "data/Metadata.tsv" @@ -94,16 +94,71 @@ You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-c ### Input specifications -The input data can be passed to nf-core/ampliseq in three possible ways using the `--input` parameter, either a folder containing zipped FastQ files, a tab-separated samplesheet, or a fasta file to be taxonomically classified. +The input data can be passed to nf-core/ampliseq in three possible ways using the parameters `--input`, `--input_fasta`, or `--input_folder`. +The three parameters and input types are mutually exclusive. + +- [Samplesheet input](#samplesheet-input) using `--input`: Tab-separated samplesheet +- [ASV/OTU fasta input](#asvotu-fasta-input) using `--input_fasta`: Fasta file with sequences to be taxonomically classified +- [Direct FASTQ input](#direct-fastq-input) using `--input_folder`: Folder containing zipped FastQ files. Optionally, a metadata sheet can be specified for downstream analysis. +#### Samplesheet input + +The sample sheet file is a tab-separated file that must have two to four columns with the following headers: + +| Column | Necessity | Description | +| ------------ | --------- | ----------------------------------------------------------------------------- | +| sampleID | required | Unique sample identifiers | +| forwardReads | required | Paths to (forward) reads zipped FastQ files | +| reverseReads | optional | Paths to reverse reads zipped FastQ files, required if the data is paired-end | +| run | optional | If the data was produced by multiple sequencing runs, any string | + +```bash +--input 'path/to/samplesheet.tsv' +``` + +For example, the samplesheet may contain: + +| sampleID | forwardReads | reverseReads | run | +| -------- | ------------------------- | ------------------------- | --- | +| sample1 | ./data/S1_R1_001.fastq.gz | ./data/S1_R2_001.fastq.gz | A | +| sample2 | ./data/S2_fw.fastq.gz | ./data/S2_rv.fastq.gz | A | +| sample3 | ./S4x.fastq.gz | ./S4y.fastq.gz | B | +| sample4 | ./a.fastq.gz | ./b.fastq.gz | B | + +Please note the following requirements: + +- 2 to 4 tab-separated columns +- Valid file extension: `.tsv` +- Must contain the header `sampleID` and `forwardReads` +- May contain the header `reverseReads` and `run` +- Sample IDs must be unique +- Sample IDs must start with a letter +- Sample IDs can only contain letters, numbers or underscores +- FastQ files must be compressed (`.fastq.gz`, `.fq.gz`) +- Within one samplesheet, only one type of raw data should be specified (same amplicon & sequencing method) + +An [example samplesheet](../assets/samplesheet.tsv) has been provided with the pipeline. + +To avoid producing a sample sheet, [Direct FASTQ input](#direct-fastq-input) may be used instead. + +#### ASV/OTU fasta input + +To taxonomically classify pre-computed sequence files, a fasta format file with sequences may be provided. +Most of the steps of the pipeline will be skipped, but ITSx & Barrnap & length filtering can be applied before taxonomic classification. +The sequence header line may contain a description, that will be kept as part of the sequence name. However, tabs will be changed into spaces. + +```bash +--input_fasta 'path/to/amplicon_sequences.fasta' +``` + #### Direct FASTQ input -The easiest way is to specify directly the path to the folder that contains your input FASTQ files. For example: +An easy way to input sequencing data to the pipeline is to specify directly the path to the folder that contains your input FASTQ files. For example: ```bash ---input 'path/to/data/' +--input_folder 'path/to/data/' ``` File names must follow a specific pattern, default is `/*_R{1,2}_001.fastq.gz`, but this can be adjusted with `--extension`. @@ -148,60 +203,6 @@ Please note the following additional requirements: - Sample identifiers are extracted from file names, i.e. the string before the first underscore `_`, these must be unique (also across sequencing runs) - If your data is scattered, produce a sample sheet -#### Samplesheet input - -The sample sheet file is an alternative way to provide input reads, it must be a tab-separated file ending with `.tsv` that must have two to four columns with the following headers: - -| Column | Necessity | Description | -| ------------ | --------- | ----------------------------------------------------------------------------- | -| sampleID | required | Unique sample identifiers | -| forwardReads | required | Paths to (forward) reads zipped FastQ files | -| reverseReads | optional | Paths to reverse reads zipped FastQ files, required if the data is paired-end | -| run | optional | If the data was produced by multiple sequencing runs, any string | - -```bash ---input 'path/to/samplesheet.tsv' -``` - -For example, the samplesheet may contain: - -| sampleID | forwardReads | reverseReads | run | -| -------- | ------------------------- | ------------------------- | --- | -| sample1 | ./data/S1_R1_001.fastq.gz | ./data/S1_R2_001.fastq.gz | A | -| sample2 | ./data/S2_fw.fastq.gz | ./data/S2_rv.fastq.gz | A | -| sample3 | ./S4x.fastq.gz | ./S4y.fastq.gz | B | -| sample4 | ./a.fastq.gz | ./b.fastq.gz | B | - -Please note the following requirements: - -- 2 to 4 tab-separated columns -- Valid file extension: `.tsv` -- Must contain the header `sampleID` and `forwardReads` -- May contain the header `reverseReads` and `run` -- Sample IDs must be unique -- Sample IDs must not contain a dot `.` -- Sample IDs may not start with a number -- FastQ files must be compressed (`.fastq.gz`, `.fq.gz`) -- Within one samplesheet, only one type of raw data should be specified (same amplicon & sequencing method) - -An [example samplesheet](../assets/samplesheet.tsv) has been provided with the pipeline. - -> **Please note:** All characters other than letters, numbers and underline in Sample IDs will be converted to dots `.`. Avoid those conversions, because they might make summary files not merging correctly and will fail to match to metadata (which can be adjusted though). - -#### ASV/OTU fasta input - -When pointing at a file ending with `.fasta`, `.fna` or `.fa`, the containing ASV/OTU sequences will be taxonomically classified. -Most of the steps of the pipeline will be skipped, but ITSx & Barrnap & length filtering can be applied before taxonomic classification. -The sequence header line may contain a description, that will be kept as part of the sequence name. However, tabs will be changed into spaces. - -```bash ---input 'path/to/amplicon_sequences.fasta' -``` - -Please note the following requirements: - -- Valid file extensions: `.fasta`, `.fna` or `.fa` - ### Metadata Metadata is optional, but for performing downstream analysis such as barplots, diversity indices or differential abundance testing, a metadata file is essential. diff --git a/nextflow_schema.json b/nextflow_schema.json index 7b3b4098..7c564f30 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -16,7 +16,7 @@ "mimetype": "text/tsv", "fa_icon": "fas fa-dna", "description": "Path to tab-separated sample sheet", - "help_text": "Path to sample sheet ending with `.tsv` that points towards compressed fastq files\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1). If the `--sintax_ref_taxonomy` is given, taxonomic assignment is performed using the USEARCH sintax method in addition to DADA2 assignTaxonomy (default: DADA2 assignTaxonomy and 16S rRNA sequence database)\n\n\n##### Sample sheet\n\nThe sample sheet file is an alternative way to provide input reads, it must be a tab-separated file ending with `.tsv` that must have two to four columns with the following headers: \n- `sampleID` (required): Unique sample identifiers, any unique string (may not contain dots `.`, must not start with a number)\n- `forwardReads` (required): Paths to (forward) reads zipped FastQ files\n- `reverseReads` (optional): Paths to reverse reads zipped FastQ files, required if the data is paired-end\n- `run` (optional): If the data was produced by multiple sequencing runs, any string\n\nFor example:\n\n```bash\n--input 'path/to/samplesheet.tsv'\n```\n\n", + "help_text": "Path to sample sheet ending with `.tsv` that points towards compressed fastq files\n\nThe sample sheet must have two to four tab-separated columns with the following headers: \n- `sampleID` (required): Unique sample IDs, must start with a letter, and can only contain letters, numbers or underscores\n- `forwardReads` (required): Paths to (forward) reads zipped FastQ files\n- `reverseReads` (optional): Paths to reverse reads zipped FastQ files, required if the data is paired-end\n- `run` (optional): If the data was produced by multiple sequencing runs, any string\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)", "schema": "assets/schema_input.json" }, "input_fasta": { @@ -24,14 +24,14 @@ "mimetype": "text/tsv", "fa_icon": "fas fa-dna", "description": "Path to ASV/OTU fasta file", - "help_text": "Fasta file ending with `.fasta`, `.fna` or `.fa` that will be taxonomically classified\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1). If the `--sintax_ref_taxonomy` is given, taxonomic assignment is performed using the USEARCH sintax method in addition to DADA2 assignTaxonomy (default: DADA2 assignTaxonomy and 16S rRNA sequence database)\n\n##### Fasta file\n\nWhen pointing at a file ending with `.fasta`, `.fna` or `.fa`, the containing sequences will be taxonomically classified. All other pipeline steps will be skipped.\n\nThe sequence header line may contain a description, that will be kept as part of the sequence name. However, tabs will be changed into spaces.\n\nThe fasta file input option can be used to taxonomically classify previously produced ASV/OTU sequences.\n\nFor example:\n\n```bash\n--input 'path/to/amplicon_sequences.fasta'\n```" + "help_text": "Path to fasta format file with sequences that will be taxonomically classified. The fasta file input option can be used to taxonomically classify previously produced ASV/OTU sequences.\n\nThe fasta sequence header line may contain a description, that will be kept as part of the sequence name. However, tabs will be changed into spaces.\n\nRelated parameters are:\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)" }, "input_folder": { "type": "string", "mimetype": "text/tsv", "fa_icon": "fas fa-dna", "description": "Path to folder containing zipped FastQ files", - "help_text": "Path to folder containing compressed fastq files\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--multiple_sequencing_runs` if the sequencing data originates from multiple sequencing runs\n- `--extension` if the sequencing file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`)\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1). If the `--sintax_ref_taxonomy` is given, taxonomic assignment is performed using the USEARCH sintax method in addition to DADA2 assignTaxonomy (default: DADA2 assignTaxonomy and 16S rRNA sequence database)\n\n##### Folder containing zipped FastQ files\n\nFor example:\n\n```bash\n--input 'path/to/data'\n```\n\nExample for input data organization from one sequencing run with two samples, paired-end data:\n\n```bash\ndata\n \u251c\u2500sample1_1_L001_R1_001.fastq.gz\n \u251c\u2500sample1_1_L001_R2_001.fastq.gz\n \u251c\u2500sample2_1_L001_R1_001.fastq.gz\n \u2514\u2500sample2_1_L001_R2_001.fastq.gz\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The folder must contain gzip compressed demultiplexed fastq files. If the file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`), please check `--extension`.\n3. Sample identifiers are extracted from file names, i.e. the string before the first underscore `_`, these must be unique\n4. If your data is scattered, produce a sample sheet\n5. All sequencing data should originate from one sequencing run, because processing relies on run-specific error models that are unreliable when data from several sequencing runs are mixed. Sequencing data originating from multiple sequencing runs requires additionally the parameter `--multiple_sequencing_runs` and a specific folder structure." + "help_text": "Path to folder containing compressed fastq files.\n\nExample for input data organization from one sequencing run with two samples, paired-end data:\n\n```bash\ndata\n \u251c\u2500sample1_1_L001_R1_001.fastq.gz\n \u251c\u2500sample1_1_L001_R2_001.fastq.gz\n \u251c\u2500sample2_1_L001_R1_001.fastq.gz\n \u2514\u2500sample2_1_L001_R2_001.fastq.gz\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The folder must contain gzip compressed demultiplexed fastq files. If the file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`), please check `--extension`.\n3. Sample identifiers are extracted from file names, i.e. the string before the first underscore `_`, these must be unique\n4. If your data is scattered, produce a sample sheet\n5. All sequencing data should originate from one sequencing run, because processing relies on run-specific error models that are unreliable when data from several sequencing runs are mixed. Sequencing data originating from multiple sequencing runs requires additionally the parameter `--multiple_sequencing_runs` and a specific folder structure.\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--multiple_sequencing_runs` if the sequencing data originates from multiple sequencing runs\n- `--extension` if the sequencing file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`)\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)" }, "FW_primer": { "type": "string", From d5ab76d9afa2e1915ea5c424415a5c146a606926 Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 11 Aug 2023 16:00:56 +0200 Subject: [PATCH 121/230] update WorkflowAmpliseq.groovy --- lib/WorkflowAmpliseq.groovy | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/lib/WorkflowAmpliseq.groovy b/lib/WorkflowAmpliseq.groovy index 39e5ce6b..bbc4466a 100755 --- a/lib/WorkflowAmpliseq.groovy +++ b/lib/WorkflowAmpliseq.groovy @@ -11,6 +11,10 @@ class WorkflowAmpliseq { // Check and validate parameters // public static void initialise(params, log) { + if ( !params.input && !params.input_fasta && !params.input_folder ) { + Nextflow.error("Missing input declaration: One of `--input`, `--input_fasta`, `--input_folder` is required.") + } + if ( params.pacbio || params.iontorrent || params.single_end ) { if (params.trunclenr) { log.warn "Unused parameter: `--trunclenr` is ignored because the data is single end." } } else if (params.trunclenf && !params.trunclenr) { @@ -113,13 +117,6 @@ class WorkflowAmpliseq { } } - // - // Check string (String s) ends with one entry of an array of strings ("String[] extn") - // - public static boolean checkIfFileHasExtension(String s, String[] extn) { - return Arrays.stream(extn).anyMatch(entry -> s.endsWith(entry)); - } - // // Get workflow summary for MultiQC // From e7022718c82ffca3849e309f06b24b041bd04e52 Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 11 Aug 2023 16:03:19 +0200 Subject: [PATCH 122/230] move param validity test to WorkflowAmpliseq.groovy --- lib/WorkflowAmpliseq.groovy | 4 ++++ workflows/ampliseq.nf | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/WorkflowAmpliseq.groovy b/lib/WorkflowAmpliseq.groovy index bbc4466a..c256f615 100755 --- a/lib/WorkflowAmpliseq.groovy +++ b/lib/WorkflowAmpliseq.groovy @@ -15,6 +15,10 @@ class WorkflowAmpliseq { Nextflow.error("Missing input declaration: One of `--input`, `--input_fasta`, `--input_folder` is required.") } + if ( !params.input_fasta && (!params.FW_primer || !params.RV_primer) && !params.skip_cutadapt ) { + Nextflow.error("Incompatible parameters: `--FW_primer` and `--RV_primer` are required for primer trimming. If primer trimming is not needed, use `--skip_cutadapt`.") + } + if ( params.pacbio || params.iontorrent || params.single_end ) { if (params.trunclenr) { log.warn "Unused parameter: `--trunclenr` is ignored because the data is single end." } } else if (params.trunclenf && !params.trunclenr) { diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 2a8c1f05..f490a838 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -91,10 +91,6 @@ if ( !single_end && !params.illumina_pe_its && (params.trunclenf == null || para log.warn "No DADA2 cutoffs were specified (`--trunclenf` & `--trunclenr`), therefore reads will be truncated where median quality drops below ${params.trunc_qmin} (defined by `--trunc_qmin`) but at least a fraction of ${params.trunc_rmin} (defined by `--trunc_rmin`) of the reads will be retained.\nThe chosen cutoffs do not account for required overlap for merging, therefore DADA2 might have poor merging efficiency or even fail.\n" } else { find_truncation_values = false } -if ( !params.input_fasta && (!params.FW_primer || !params.RV_primer) && !params.skip_cutadapt ) { - error("Incompatible parameters: `--FW_primer` and `--RV_primer` are required for primer trimming. If primer trimming is not needed, use `--skip_cutadapt`.") -} - // save params to values to be able to overwrite it tax_agglom_min = params.tax_agglom_min tax_agglom_max = params.tax_agglom_max From 9c648d19cb9671075c69f941f0618b44a1f7b84c Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 15 Aug 2023 16:31:43 +0200 Subject: [PATCH 123/230] fix fasta input --- workflows/ampliseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index f490a838..adad11cf 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -749,7 +749,7 @@ workflow AMPLISEQ { ch_metadata.ifEmpty( [] ), params.input ? file(params.input) : [], // samplesheet input ch_input_fasta.ifEmpty( [] ), // fasta input - !params.skip_fastqc && !params.skip_multiqc ? MULTIQC.out.plots : [], //.collect().flatten().collectFile(name: "mqc_fastqc_per_sequence_quality_scores_plot_1.svg") + !params.input_fasta && !params.skip_fastqc && !params.skip_multiqc ? MULTIQC.out.plots : [], //.collect().flatten().collectFile(name: "mqc_fastqc_per_sequence_quality_scores_plot_1.svg") !params.skip_cutadapt ? CUTADAPT_WORKFLOW.out.summary.collect().ifEmpty( [] ) : [], find_truncation_values, DADA2_PREPROCESSING.out.args.first().ifEmpty( [] ), From a358724ca1a89d74ecf3798a567cdd27b83fa62f Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 16 Aug 2023 14:25:45 +0200 Subject: [PATCH 124/230] tsv, csv, yml, yaml are now allowed as sample sheet --- CHANGELOG.md | 3 ++- docs/usage.md | 6 +++--- nextflow_schema.json | 5 +++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e9e8ee1a..d296dbae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,11 +11,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- [#616](https://github.com/nf-core/ampliseq/pull/616) - When using a sample sheet with `--input` containing forward an reverse reads, specifying `--single_end` will only extract forward reads and treat the data as single ended instead of extracting forward and reverse reads. - [#616](https://github.com/nf-core/ampliseq/pull/616) - `--input` was split into three params: (1) `--input` for samplesheet, (2) `--input_fasta` for ASV/OTU fasta input, (3) `--input_folder` direct FASTQ input | Param updated | Param old | Accepts | | ------------- | --------- | ---------------------------------------- | -| input | input | samplesheet, .tsv | +| input | input | samplesheet, .tsv/.csv/.yml/.yaml | | input_fasta | input | ASV/OTU sequences, .fasta | | input_folder | input | Folder containing compressed fastq files | diff --git a/docs/usage.md b/docs/usage.md index b1fecaa8..4df75b45 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -105,7 +105,7 @@ Optionally, a metadata sheet can be specified for downstream analysis. #### Samplesheet input -The sample sheet file is a tab-separated file that must have two to four columns with the following headers: +The sample sheet file can be tab-separated (.tsv), comma-separated (.csv), or in YAML format (.yml/.yaml) that must have two to four columns/entries with the following headers: | Column | Necessity | Description | | ------------ | --------- | ----------------------------------------------------------------------------- | @@ -129,8 +129,8 @@ For example, the samplesheet may contain: Please note the following requirements: -- 2 to 4 tab-separated columns -- Valid file extension: `.tsv` +- 2 to 4 columns/entries +- Valid file extensions: `.tsv`,`.csv`,`.yml`,`.yaml` - Must contain the header `sampleID` and `forwardReads` - May contain the header `reverseReads` and `run` - Sample IDs must be unique diff --git a/nextflow_schema.json b/nextflow_schema.json index 7c564f30..515f4a2e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -16,7 +16,7 @@ "mimetype": "text/tsv", "fa_icon": "fas fa-dna", "description": "Path to tab-separated sample sheet", - "help_text": "Path to sample sheet ending with `.tsv` that points towards compressed fastq files\n\nThe sample sheet must have two to four tab-separated columns with the following headers: \n- `sampleID` (required): Unique sample IDs, must start with a letter, and can only contain letters, numbers or underscores\n- `forwardReads` (required): Paths to (forward) reads zipped FastQ files\n- `reverseReads` (optional): Paths to reverse reads zipped FastQ files, required if the data is paired-end\n- `run` (optional): If the data was produced by multiple sequencing runs, any string\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)", + "help_text": "Path to sample sheet, either tab-separated (.tsv), comma-separated (.csv), or in YAML format (.yml/.yaml), that points towards compressed fastq files\n\nThe sample sheet must have two to four tab-separated columns/entries with the following headers: \n- `sampleID` (required): Unique sample IDs, must start with a letter, and can only contain letters, numbers or underscores\n- `forwardReads` (required): Paths to (forward) reads zipped FastQ files\n- `reverseReads` (optional): Paths to reverse reads zipped FastQ files, required if the data is paired-end\n- `run` (optional): If the data was produced by multiple sequencing runs, any string\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)", "schema": "assets/schema_input.json" }, "input_fasta": { @@ -88,7 +88,8 @@ }, "single_end": { "type": "boolean", - "description": "If data is single-ended Illumina reads instead of paired-end" + "description": "If data is single-ended Illumina reads instead of paired-end", + "help_text": "When using a sample sheet with `--input` containing forward an reverse reads, specifying `--single_end` will only extract forward reads and treat the data as single ended instead of extracting forward and reverse reads." }, "illumina_pe_its": { "type": "boolean", From d925f8b4436d8ca674f4f7416bd9505761513c1b Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 16 Aug 2023 14:36:00 +0200 Subject: [PATCH 125/230] update single_end test --- conf/test_single.config | 3 ++- tests/pipeline/single.nf.test.snap | 30 +++++++++++++++--------------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/conf/test_single.config b/conf/test_single.config index 4050ad67..b24e852b 100644 --- a/conf/test_single.config +++ b/conf/test_single.config @@ -22,7 +22,8 @@ params { // Input data FW_primer = "GTGYCAGCMGCCGCGGTAA" RV_primer = "GGACTACNVGGGTWTCTAAT" - input = "https://github.com/nf-core/test-datasets/raw/ampliseq/samplesheets/Samplesheet_single_end.tsv" + input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet.tsv" + single_end = true dada_ref_taxonomy = "rdp=18" cut_dada_ref_taxonomy = true diff --git a/tests/pipeline/single.nf.test.snap b/tests/pipeline/single.nf.test.snap index bd9096d0..33688a52 100644 --- a/tests/pipeline/single.nf.test.snap +++ b/tests/pipeline/single.nf.test.snap @@ -1,15 +1,15 @@ { "input": { "content": [ - "Samplesheet_single_end.tsv:md5,71bcb9920b1187571ba9e2a5759ee4a5" + "Samplesheet_single_end.tsv:md5,dbf8d1a2b7933dab9e5a139f33c2b1f4" ], - "timestamp": "2023-05-28T20:35:33+0000" + "timestamp": "2023-08-16T20:35:33+0000" }, "cutadapt": { "content": [ - "cutadapt_summary.tsv:md5,5e5bfa4a7324a44f6d9e3cb0978ca291" + "cutadapt_summary.tsv:md5,cde6a72b1f0daccb7b69727834fbb9e5" ], - "timestamp": "2023-05-28T20:35:33+0000" + "timestamp": "2023-08-16T20:35:33+0000" }, "software_versions": { "content": [ @@ -19,20 +19,20 @@ }, "overall_summary_tsv": { "content": [ - "overall_summary.tsv:md5,0feea9a92fde36cbf63dba6e63617c7e" + "overall_summary.tsv:md5,9c37a0292537273537640cdb0dd8fba5" ], - "timestamp": "2023-05-28T20:35:33+0000" + "timestamp": "2023-08-16T20:35:33+0000" }, "dada2": { "content": [ "ASV_seqs.fasta:md5,d452ff8b8a306b52ffc6db7e4396c6db", - "ASV_table.tsv:md5,a1226d8573fc0595161d4b2b5ac63cac", + "ASV_table.tsv:md5,06b93679e1f67a8707d2cc7edf345340", "ref_taxonomy.rdp_18.txt:md5,815c4fce9f3d1de019fb995a43fb66ed", - "DADA2_stats.tsv:md5,8386cc209c1f64237deeec79f75b075b", - "DADA2_table.rds:md5,aefd24f6ac2753a43baca19a93c4e2ee", - "DADA2_table.tsv:md5,3e5280fd5b36c943c0148c4d5b50cb65" + "DADA2_stats.tsv:md5,d4802595db56db3ae706f1650a774e5c", + "DADA2_table.rds:md5,a8e68947cb81f49a36d243619fe5e2f0", + "DADA2_table.tsv:md5,27c340a79b092d8ebea347f9d9324996" ], - "timestamp": "2023-05-28T20:35:33+0000" + "timestamp": "2023-08-16T20:35:33+0000" }, "barrnap": { "content": [ @@ -45,10 +45,10 @@ }, "multiqc": { "content": [ - "multiqc_fastqc.txt:md5,0ea2e6e2d327d66e778e9ff5d03d933b", - "multiqc_general_stats.txt:md5,5040629a246bb3288879d3d30e9d6f40", - "multiqc_cutadapt.txt:md5,48d079bea04fe93d260b980c15793a0c" + "multiqc_fastqc.txt:md5,16ff42a422cd6c6a787c97febe12aa69", + "multiqc_general_stats.txt:md5,d22c32eed33d046503751b23670db5e4", + "multiqc_cutadapt.txt:md5,4311a83074d94040405937e02773c5a9" ], - "timestamp": "2023-05-28T20:35:33+0000" + "timestamp": "2023-08-16T20:35:33+0000" } } From 8f5b9cbc757b0352a143b3a0a8f086a64aade1b2 Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 16 Aug 2023 14:49:45 +0200 Subject: [PATCH 126/230] update file names --- tests/pipeline/single.nf.test | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/pipeline/single.nf.test b/tests/pipeline/single.nf.test index 02d54e9e..11bb9156 100644 --- a/tests/pipeline/single.nf.test +++ b/tests/pipeline/single.nf.test @@ -24,10 +24,10 @@ nextflow_pipeline { path("$outputDir/barrnap/rrna.mito.gff")).match("barrnap") }, { assert new File("$outputDir/barrnap/summary.tsv").exists() }, { assert snapshot(path("$outputDir/cutadapt/cutadapt_summary.tsv")).match("cutadapt") }, - { assert new File("$outputDir/cutadapt/1a_S103_L001_R1_001.trimmed.cutadapt.log").exists() }, - { assert new File("$outputDir/cutadapt/1_S103_L001_R1_001.trimmed.cutadapt.log").exists() }, - { assert new File("$outputDir/cutadapt/2a_S115_L001_R1_001.trimmed.cutadapt.log").exists() }, - { assert new File("$outputDir/cutadapt/2_S115_L001_R1_001.trimmed.cutadapt.log").exists() }, + { assert new File("$outputDir/cutadapt/sampleID_1a.trimmed.cutadapt.log").exists() }, + { assert new File("$outputDir/cutadapt/sampleID_1.trimmed.cutadapt.log").exists() }, + { assert new File("$outputDir/cutadapt/sampleID_2a.trimmed.cutadapt.log").exists() }, + { assert new File("$outputDir/cutadapt/sampleID_2.trimmed.cutadapt.log").exists() }, { assert new File("$outputDir/cutadapt/assignTaxonomy.cutadapt.log").exists() }, { assert snapshot(path("$outputDir/dada2/ASV_seqs.fasta"), path("$outputDir/dada2/ASV_table.tsv"), @@ -37,11 +37,11 @@ nextflow_pipeline { path("$outputDir/dada2/DADA2_table.tsv")).match("dada2") }, { assert new File("$outputDir/dada2/ASV_tax.rdp_18.tsv").exists() }, { assert new File("$outputDir/dada2/ASV_tax_species.rdp_18.tsv").exists() }, - { assert new File("$outputDir/fastqc/1a_S103_L001_R1_001_fastqc.html").exists() }, - { assert new File("$outputDir/fastqc/1_S103_L001_R1_001_fastqc.html").exists() }, - { assert new File("$outputDir/fastqc/2a_S115_L001_R1_001_fastqc.html").exists() }, - { assert new File("$outputDir/fastqc/2_S115_L001_R1_001_fastqc.html").exists() }, - { assert snapshot(path("$outputDir/input/Samplesheet_single_end.tsv")).match("input") }, + { assert new File("$outputDir/fastqc/sampleID_1a_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_2a_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_2_fastqc.html").exists() }, + { assert snapshot(path("$outputDir/input/Samplesheet.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, From 9d85ce151430184bd61c0610e4bc5e34c6cb7cb9 Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 16 Aug 2023 15:02:25 +0200 Subject: [PATCH 127/230] update one more file name --- tests/pipeline/single.nf.test.snap | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipeline/single.nf.test.snap b/tests/pipeline/single.nf.test.snap index 33688a52..ad74ef74 100644 --- a/tests/pipeline/single.nf.test.snap +++ b/tests/pipeline/single.nf.test.snap @@ -1,7 +1,7 @@ { "input": { "content": [ - "Samplesheet_single_end.tsv:md5,dbf8d1a2b7933dab9e5a139f33c2b1f4" + "Samplesheet.tsv:md5,dbf8d1a2b7933dab9e5a139f33c2b1f4" ], "timestamp": "2023-08-16T20:35:33+0000" }, From 7747d9e95b8ec5a7994b48433bbeaec5252e8f1f Mon Sep 17 00:00:00 2001 From: jtangrot Date: Thu, 17 Aug 2023 09:04:17 +0200 Subject: [PATCH 128/230] Fix duplicatation from inclusion of dev --- lib/WorkflowMain.groovy | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 54128f4a..7f49735e 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -37,9 +37,6 @@ class WorkflowMain { if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { qiimereftaxonomyExistsError(params, log) } - if (params.sintax_ref_taxonomy && !params.skip_taxonomy) { - sintaxreftaxonomyExistsError(params, log) - } // Print workflow version and exit on --version if (params.version) { @@ -99,17 +96,4 @@ class WorkflowMain { Nextflow.error(error_string) } } - // - // Exit pipeline if incorrect --qiime_ref_taxonomy key provided - // - private static void sintaxreftaxonomyExistsError(params, log) { - if (params.sintax_ref_databases && params.sintax_ref_taxonomy && !params.sintax_ref_databases.containsKey(params.sintax_ref_taxonomy)) { - def error_string = "=============================================================================\n" + - " SINTAX reference database '${params.sintax_ref_taxonomy}' not found in any config files provided to the pipeline.\n" + - " Currently, the available reference taxonomy keys for `--sintax_ref_taxonomy` are:\n" + - " ${params.sintax_ref_databases.keySet().join(", ")}\n" + - "===================================================================================" - Nextflow.error(error_string) - } - } } From 0b1572ffa320cefe8d83073c00a3d3594cd4ee1c Mon Sep 17 00:00:00 2001 From: jtangrot Date: Thu, 17 Aug 2023 09:10:09 +0200 Subject: [PATCH 129/230] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cd2ca8b0..8b703bc8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#605](https://github.com/nf-core/ampliseq/pull/605) - Make `--sbdiexport` compatible with PR2 version 5.0.0 - [#614](https://github.com/nf-core/ampliseq/pull/614) - Template update for nf-core/tools version 2.9 +- [#617](https://github.com/nf-core/ampliseq/pull/617) - Fix database compatibility check for `--sbdiexport` ### `Dependencies` From ac1d1a76ad3435b81943082630c4355e500d89c4 Mon Sep 17 00:00:00 2001 From: daniel Date: Thu, 17 Aug 2023 11:07:20 +0200 Subject: [PATCH 130/230] update summary report params --- assets/report_template.Rmd | 20 ++++++++++---------- modules/local/summary_report.nf | 10 +++++----- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index a14cdc6c..2483205d 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -60,9 +60,9 @@ params: # file paths metadata: FALSE - samplesheet: FALSE - fasta: FALSE - input: FALSE + input_samplesheet: FALSE + input_fasta: FALSE + input_folder: FALSE mqc_plot: FALSE cutadapt_summary: FALSE dada_filtntrim_args: FALSE @@ -184,22 +184,22 @@ Pipeline input was saved in folder [input](../input). ")) } -if ( !isFALSE(params$samplesheet) ) { +if ( !isFALSE(params$input_samplesheet) ) { # samplesheet input - cat("\nSequencing data was provided in the samplesheet file `", params$samplesheet, "` that is displayed below:", sep="") + cat("\nSequencing data was provided in the samplesheet file `", params$input_samplesheet, "` that is displayed below:", sep="") - samplesheet <- read.table(file = params$samplesheet, header = TRUE, sep = "\t") + samplesheet <- read.table(file = params$input_samplesheet, header = TRUE, sep = "\t") # Display table datatable(samplesheet, options = list( scrollX = TRUE, scrollY = "300px", paging = FALSE)) -} else if ( !isFALSE(params$fasta) ) { +} else if ( !isFALSE(params$input_fasta) ) { # fasta input - cat("\nASV/OTU sequences were provided in the fasta file `", params$fasta, "`. ", sep="") -} else if ( !isFALSE(params$input) ) { + cat("\nASV/OTU sequences were provided in the fasta file `", params$input_fasta, "`. ", sep="") +} else if ( !isFALSE(params$input_folder) ) { # folder input - cat("\nSequencing data was retrieved from folder `", params$fasta, "`. ", sep="") + cat("\nSequencing data was retrieved from folder `", params$input_folder, "`. ", sep="") } if ( !isFALSE(params$metadata) ) { cat("\nMetadata associated with the sequencing data was provided in `", params$metadata, "` and is displayed below:", sep="") diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 8af605c6..f3872123 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -12,8 +12,8 @@ process SUMMARY_REPORT { path(report_logo) path(report_abstract) path(metadata) - path(samplesheet) - path(fasta) + path(input_samplesheet) + path(input_fasta) path(mqc_plots) path(cutadapt_summary) val(find_truncation_values) @@ -73,9 +73,9 @@ process SUMMARY_REPORT { report_abstract ? "report_abstract='$params.report_abstract'" : "", meta.single_end ? "flag_single_end=TRUE" : "", metadata ? "metadata='$metadata'" : "", - samplesheet ? "samplesheet='$samplesheet'" : "", - fasta ? "fasta='$fasta'" : "", - !fasta && !samplesheet ? "input='$params.input'" : "", + input_samplesheet ? "input_samplesheet='$input_samplesheet'" : "", + input_fasta ? "input_fasta='$input_fasta'" : "", + !input_fasta && !input_samplesheet ? "input_folder='$params.input_folder'" : "", mqc_plots ? "mqc_plot='${mqc_plots}/svg/mqc_fastqc_per_sequence_quality_scores_plot_1.svg'" : "", cutadapt_summary ? params.retain_untrimmed ? "flag_retain_untrimmed=TRUE,cutadapt_summary='$cutadapt_summary'" : From 7342998e89beafb5032e44ccb37ea8a8204375b1 Mon Sep 17 00:00:00 2001 From: daniel Date: Thu, 17 Aug 2023 11:08:12 +0200 Subject: [PATCH 131/230] correct info on accepted files --- docs/usage.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 4df75b45..1d6632ee 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -97,7 +97,7 @@ You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-c The input data can be passed to nf-core/ampliseq in three possible ways using the parameters `--input`, `--input_fasta`, or `--input_folder`. The three parameters and input types are mutually exclusive. -- [Samplesheet input](#samplesheet-input) using `--input`: Tab-separated samplesheet +- [Samplesheet input](#samplesheet-input) using `--input`: Samplesheet tab-separated, comma-separated, or in YAML format - [ASV/OTU fasta input](#asvotu-fasta-input) using `--input_fasta`: Fasta file with sequences to be taxonomically classified - [Direct FASTQ input](#direct-fastq-input) using `--input_folder`: Folder containing zipped FastQ files. @@ -118,7 +118,7 @@ The sample sheet file can be tab-separated (.tsv), comma-separated (.csv), or in --input 'path/to/samplesheet.tsv' ``` -For example, the samplesheet may contain: +For example, the tab-separated samplesheet may contain: | sampleID | forwardReads | reverseReads | run | | -------- | ------------------------- | ------------------------- | --- | @@ -130,7 +130,7 @@ For example, the samplesheet may contain: Please note the following requirements: - 2 to 4 columns/entries -- Valid file extensions: `.tsv`,`.csv`,`.yml`,`.yaml` +- File extensions `.tsv`,`.csv`,`.yml`,`.yaml` specify the file type, otherwise file type will be derived from content, if possible - Must contain the header `sampleID` and `forwardReads` - May contain the header `reverseReads` and `run` - Sample IDs must be unique From daaac71a22fb5afadf2d421c8f7525c3aed87edf Mon Sep 17 00:00:00 2001 From: Daniel Straub <42973691+d4straub@users.noreply.github.com> Date: Thu, 17 Aug 2023 11:09:35 +0200 Subject: [PATCH 132/230] Apply suggestions from code review Co-authored-by: Daniel Lundin --- CHANGELOG.md | 2 +- docs/usage.md | 2 +- nextflow_schema.json | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d296dbae..cc62b1d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` -- [#616](https://github.com/nf-core/ampliseq/pull/616) - When using a sample sheet with `--input` containing forward an reverse reads, specifying `--single_end` will only extract forward reads and treat the data as single ended instead of extracting forward and reverse reads. +- [#616](https://github.com/nf-core/ampliseq/pull/616) - When using a sample sheet with `--input` containing forward and reverse reads, specifying `--single_end` will only extract forward reads and treat the data as single ended instead of extracting forward and reverse reads. - [#616](https://github.com/nf-core/ampliseq/pull/616) - `--input` was split into three params: (1) `--input` for samplesheet, (2) `--input_fasta` for ASV/OTU fasta input, (3) `--input_folder` direct FASTQ input | Param updated | Param old | Accepts | diff --git a/docs/usage.md b/docs/usage.md index 4df75b45..239c6d67 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -105,7 +105,7 @@ Optionally, a metadata sheet can be specified for downstream analysis. #### Samplesheet input -The sample sheet file can be tab-separated (.tsv), comma-separated (.csv), or in YAML format (.yml/.yaml) that must have two to four columns/entries with the following headers: +The sample sheet file can be tab-separated (.tsv), comma-separated (.csv), or in YAML format (.yml/.yaml) and can have two to four columns/entries with the following headers: | Column | Necessity | Description | | ------------ | --------- | ----------------------------------------------------------------------------- | diff --git a/nextflow_schema.json b/nextflow_schema.json index 515f4a2e..f1f39bce 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -16,7 +16,7 @@ "mimetype": "text/tsv", "fa_icon": "fas fa-dna", "description": "Path to tab-separated sample sheet", - "help_text": "Path to sample sheet, either tab-separated (.tsv), comma-separated (.csv), or in YAML format (.yml/.yaml), that points towards compressed fastq files\n\nThe sample sheet must have two to four tab-separated columns/entries with the following headers: \n- `sampleID` (required): Unique sample IDs, must start with a letter, and can only contain letters, numbers or underscores\n- `forwardReads` (required): Paths to (forward) reads zipped FastQ files\n- `reverseReads` (optional): Paths to reverse reads zipped FastQ files, required if the data is paired-end\n- `run` (optional): If the data was produced by multiple sequencing runs, any string\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)", + "help_text": "Path to sample sheet, either tab-separated (.tsv), comma-separated (.csv), or in YAML format (.yml/.yaml), that points to compressed fastq files.\n\nThe sample sheet must have two to four tab-separated columns/entries with the following headers: \n- `sampleID` (required): Unique sample IDs, must start with a letter, and can only contain letters, numbers or underscores\n- `forwardReads` (required): Paths to (forward) reads zipped FastQ files\n- `reverseReads` (optional): Paths to reverse reads zipped FastQ files, required if the data is paired-end\n- `run` (optional): If the data was produced by multiple sequencing runs, any string\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)", "schema": "assets/schema_input.json" }, "input_fasta": { @@ -89,7 +89,7 @@ "single_end": { "type": "boolean", "description": "If data is single-ended Illumina reads instead of paired-end", - "help_text": "When using a sample sheet with `--input` containing forward an reverse reads, specifying `--single_end` will only extract forward reads and treat the data as single ended instead of extracting forward and reverse reads." + "help_text": "When using a sample sheet with `--input` containing forward and reverse reads, specifying `--single_end` will only extract forward reads and treat the data as single ended instead of extracting forward and reverse reads." }, "illumina_pe_its": { "type": "boolean", From 2c2ea1e6bc44d96078af5cbc832883c9c5a7bc5f Mon Sep 17 00:00:00 2001 From: daniel Date: Thu, 17 Aug 2023 11:53:39 +0200 Subject: [PATCH 133/230] test if paired end samplesheet when not --single_end --- workflows/ampliseq.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index adad11cf..edb459c3 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -225,6 +225,7 @@ workflow AMPLISEQ { .map{ meta, readfw, readrv -> meta.single_end = single_end.toBoolean() def reads = single_end ? readfw : [readfw,readrv] + if ( !meta.single_end && !readrv ) { error("Entry `reverseReads` is missing in $params.input for $meta.id, either correct the samplesheet or use `--single_end`, `--pacbio`, or `--iontorrent`") } // make sure that reverse reads are present when single_end isnt specified return [meta, reads] } } else if ( params.input_fasta ) { ch_input_fasta = Channel.fromPath(params.input_fasta, checkIfExists: true) @@ -232,7 +233,7 @@ workflow AMPLISEQ { PARSE_INPUT ( params.input_folder, single_end, params.multiple_sequencing_runs, params.extension ) ch_input_reads = PARSE_INPUT.out.reads } else { - error "One of --input, --input_fasta, --input_folder must be provided!" + error("One of `--input`, `--input_fasta`, `--input_folder` must be provided!") } //Filter empty files From ea07526032808d0c77b408bb070d2686304a8a65 Mon Sep 17 00:00:00 2001 From: daniel Date: Thu, 17 Aug 2023 14:27:55 +0200 Subject: [PATCH 134/230] add SBDI export and phyloseq to summary report --- CHANGELOG.md | 1 + README.md | 1 + assets/report_template.Rmd | 37 +++++++++++++++++++++++++++++++-- modules/local/summary_report.nf | 5 ++++- workflows/ampliseq.nf | 4 +++- 5 files changed, 44 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d52f5d0..4bdd16a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - [#558](https://github.com/nf-core/ampliseq/pull/558) - Pipeline summary report +- [#615](https://github.com/nf-core/ampliseq/pull/615) - Phyloseq R object creation ### `Changed` diff --git a/README.md b/README.md index e6b84050..368a3eb2 100644 --- a/README.md +++ b/README.md @@ -88,6 +88,7 @@ nf-core/ampliseq was originally written by Daniel Straub ([@d4straub](https://gi We thank the following people for their extensive assistance in the development of this pipeline (in alphabetical order): +- [Adam Bennett](https://github.com/a4000) - [Diego Brambilla](https://github.com/DiegoBrambilla) - [Emelie Nilsson](https://github.com/emnilsson) - [Jeanette Tångrot](https://github.com/jtangrot) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 2483205d..c2cbee1a 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -95,6 +95,8 @@ params: diversity_indices_beta: FALSE diversity_indices_adonis: "" picrust_pathways: FALSE + sbdi: FALSE + phyloseq: FALSE --- @@ -1379,11 +1381,11 @@ for (folder in ancom) { } ``` - + ```{r, eval = !isFALSE(params$picrust_pathways), results='asis'} cat(paste0(" -## PICRUSt2 +# PICRUSt2 [PICRUSt2](https://pubmed.ncbi.nlm.nih.gov/32483366/) (Phylogenetic Investigation of Communities by Reconstruction of Unobserved States) is a software for predicting functional abundances based only on marker gene sequences. @@ -1394,6 +1396,37 @@ see `METACYC_path_abun_unstrat_descrip.tsv`. Quantifications are not normalized ")) ``` + + +```{r, eval = !isFALSE(params$sbdi), results='asis'} +cat(paste0(" +# SBDI + +The [Swedish Biodiversity Infrastructure (SBDI)](https://biodiversitydata.se/) provides a cost-effective, cutting-edge +infrastructure that supports Swedish and international biodiversity and ecosystems research. +Files in preparation for submission to SBDI can be found in folder [SBDI](../SBDI/). +Tables are generated from the DADA2 denoising and taxonomy assignment steps. +Each table, except `annotation.tsv`, corresponds to one tab in the [SBDI submission template](https://asv-portal.biodiversitydata.se/submit). +Most of the fields in the template will not be populated, +but if you run nf-core/ampliseq with a sample metadata table (`--metadata`) any fields corresponding to a field in the template will be used. +")) +``` + + + +```{r, eval = !isFALSE(params$phyloseq), results='asis'} +cat(paste0(" +# Phyloseq + +[Phyloseq](https://doi.org/10.1371/journal.pone.0061217) +is a popular R package to analyse and visualize microbiom data. +The produced RDS files contain phyloseq objects and can be loaded directely into R and phyloseq. +The objects contain an ASV abundance table and a taxonomy table. +If available, metadata and phylogenetic tree will also be included in the phyloseq object. +The files can be found in folder [phyloseq](../phyloseq/). +")) +``` + # Methods diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index f3872123..582bc90f 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -50,7 +50,8 @@ process SUMMARY_REPORT { path(diversity_indices_adonis, stageAs: 'beta_diversity/adonis/*') // prevent folder name collisons path(ancom) path(picrust_pathways) - + path(sbdi, stageAs: 'sbdi/*') + path(phyloseq, stageAs: 'phyloseq/*') output: path "*.svg" , emit: svg, optional: true @@ -119,6 +120,8 @@ process SUMMARY_REPORT { diversity_indices ? "diversity_indices_depth='$diversity_indices',diversity_indices_beta='"+ diversity_indices_beta.join(",") +"'" : "", diversity_indices_adonis ? "diversity_indices_adonis='"+ diversity_indices_adonis.join(",") +"',qiime_adonis_formula='$params.qiime_adonis_formula'" : "", ancom ? "ancom='"+ ancom.join(",") +"'" : "", + sbdi ? "sbdi='"+ sbdi.join(",") +"'" : "", + phyloseq ? "phyloseq='"+ phyloseq.join(",") +"'" : "", ] // groovy list to R named list string; findAll removes empty entries params_list_named_string = params_list_named.findAll().join(',').trim() diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index edb459c3..560a6f89 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -798,7 +798,9 @@ workflow AMPLISEQ { run_qiime2 && !params.skip_diversity_indices && params.metadata ? QIIME2_DIVERSITY.out.beta.collect().ifEmpty( [] ) : [], run_qiime2 && !params.skip_diversity_indices && params.metadata ? QIIME2_DIVERSITY.out.adonis.collect().ifEmpty( [] ) : [], run_qiime2 && !params.skip_ancom && params.metadata ? QIIME2_ANCOM.out.ancom.collect().ifEmpty( [] ) : [], - params.picrust ? PICRUST.out.pathways.ifEmpty( [] ) : [] + params.picrust ? PICRUST.out.pathways.ifEmpty( [] ) : [], + params.sbdiexport ? SBDIEXPORT.out.sbditables.mix(SBDIEXPORTREANNOTATE.out.sbdiannottables).ifEmpty( [] ) : [], + !params.skip_taxonomy ? PHYLOSEQ_WORKFLOW.out.rds.map{info,rds -> [rds]}.collect().ifEmpty( [] ) : [] ) ch_versions = ch_versions.mix(SUMMARY_REPORT.out.versions) } From 671d54fa9cd316a6634372a72e34814534edf10a Mon Sep 17 00:00:00 2001 From: daniel Date: Thu, 17 Aug 2023 14:33:37 +0200 Subject: [PATCH 135/230] update changelog --- CHANGELOG.md | 2 +- workflows/ampliseq.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bdd16a9..8b4a41fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` -- [#558](https://github.com/nf-core/ampliseq/pull/558) - Pipeline summary report +- [#558](https://github.com/nf-core/ampliseq/pull/558),[#619](https://github.com/nf-core/ampliseq/pull/619) - Pipeline summary report - [#615](https://github.com/nf-core/ampliseq/pull/615) - Phyloseq R object creation ### `Changed` diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 560a6f89..a5e6f6be 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -799,7 +799,7 @@ workflow AMPLISEQ { run_qiime2 && !params.skip_diversity_indices && params.metadata ? QIIME2_DIVERSITY.out.adonis.collect().ifEmpty( [] ) : [], run_qiime2 && !params.skip_ancom && params.metadata ? QIIME2_ANCOM.out.ancom.collect().ifEmpty( [] ) : [], params.picrust ? PICRUST.out.pathways.ifEmpty( [] ) : [], - params.sbdiexport ? SBDIEXPORT.out.sbditables.mix(SBDIEXPORTREANNOTATE.out.sbdiannottables).ifEmpty( [] ) : [], + params.sbdiexport ? SBDIEXPORT.out.sbditables.mix(SBDIEXPORTREANNOTATE.out.sbdiannottables).collect().ifEmpty( [] ) : [], !params.skip_taxonomy ? PHYLOSEQ_WORKFLOW.out.rds.map{info,rds -> [rds]}.collect().ifEmpty( [] ) : [] ) ch_versions = ch_versions.mix(SUMMARY_REPORT.out.versions) From 5ec141d9f9d13a978f647c25798ef474924ab4e5 Mon Sep 17 00:00:00 2001 From: daniel Date: Thu, 17 Aug 2023 15:13:13 +0200 Subject: [PATCH 136/230] minimum update of multiqc methods --- assets/methods_description_template.yml | 5 +++-- nextflow.config | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 625b2446..3f1b1e44 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,16 +3,17 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "nf-core/ampliseq Methods Description" section_href: "https://github.com/nf-core/ampliseq" plot_type: "html" -## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline +## nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline ## You inject any metadata in the Nextflow '${workflow}' object data: |

    Methods

    -

    Data was processed using nf-core/ampliseq v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

    +

    Data was processed using nf-core/ampliseq v${workflow.manifest.version} ${doi_text} (Straub et al., 2020) of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

    The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

    ${workflow.commandLine}

    ${tool_citations}

    References