From 9652a7e4f21097a58144869ba1ad6d3601b5ad42 Mon Sep 17 00:00:00 2001 From: Monika Brandt Date: Wed, 26 Jun 2024 13:46:47 +0200 Subject: [PATCH 1/4] Use customized list of adapters, FastQC When hoovering over the adapter content plot in the the FastQC output displayed in the end of all seqreports the name of the adapter found is appearing. The adapters and names used are by default the ones specified in the FastQC software. However, the provided list is outdated and many of the adapters are used in several library preparation kits. A customized list can now be used as input instead to look for adapters we use at SNP&SEQ and to display relevant names when found. Files changed: - New file, adapter_list_fastqc.txt, added in config/tool_config/ to specify the adapters to look for. - The fastqc command in main.nf is now updated with the additional a-flag that is used to specify the use of the file described under the previous point. --- config/tool_config/adapter_list_fastqc.txt | 31 +++++++++++++++++++ main.nf | 6 ++-- test_data/test_config/adapter_list_fastqc.txt | 1 + 3 files changed, 36 insertions(+), 2 deletions(-) create mode 100644 config/tool_config/adapter_list_fastqc.txt create mode 120000 test_data/test_config/adapter_list_fastqc.txt diff --git a/config/tool_config/adapter_list_fastqc.txt b/config/tool_config/adapter_list_fastqc.txt new file mode 100644 index 0000000..f9c969c --- /dev/null +++ b/config/tool_config/adapter_list_fastqc.txt @@ -0,0 +1,31 @@ +# This file is copied and modified from +# https://github.com/s-andrews/FastQC/blob/master/Configuration/adapter_list.txt +# The file has been modified according to +# https://knowledge.illumina.com/library-preparation/general/library-preparation-general-reference_material-list/000001314 +# +# This file contains a set of sequence fragments which will be explicitly +# searched against your library. The reporting will be similar to the +# Kmer plot, except that every sequence in this list will be shown so +# you can use this to judge the level of adapter read-through even if those +# adapter sequences aren't picked out by the Kmer module. +# +# Since every sequence here will be analysed and the results plotted it +# doesn't make any sense to include duplicate sequences, or to add too +# many sequences since your plot will end up a mess. +# +# You can add more sequences to the file by putting one line per entry +# and specifying a name[tab]sequence. If the contaminant you add is +# likely to be of use to others please consider sending it to the FastQ +# authors, either via a bug report at www.bioinformatics.babraham.ac.uk/bugzilla/ +# or by directly emailing simon.andrews@babraham.ac.uk so other users of +# the program can benefit. +# +# For the time being it's going to be easier to interpret this plot if all +# of the sequences provided are the same length, so we've gone with 12bp +# fragments for now. + +Illumina TruSeq DNA Adapter AGATCGGAAGAG +Illumina TruSeq Small RNA Adapter TGGAATTCTCGG +Illumina Stranded Total RNA Prep, Ligation with Ribo-Zero Plus CTGTCTCTTATA +PolyA AAAAAAAAAAAA +PolyG GGGGGGGGGGGG diff --git a/main.nf b/main.nf index e2230ee..b5966c1 100644 --- a/main.nf +++ b/main.nf @@ -135,7 +135,8 @@ workflow CHECK_RUN_QUALITY { GET_QC_THRESHOLDS(run_folder) GET_METADATA(run_folder) project_and_reads = get_project_and_reads(params.run_folder) - FASTQC(project_and_reads) + FASTQC(project_and_reads, + params.config_dir) FASTQ_SCREEN(project_and_reads, params.config_dir, params.fastqscreen_databases) @@ -168,6 +169,7 @@ process FASTQC { input: tuple val(project), path(fastq_file) + path config_dir output: tuple val(project), path("*_results") @@ -175,7 +177,7 @@ process FASTQC { script: """ mkdir -p $fastq_file"_fastqc_results" - fastqc -t ${task.cpus} -o $fastq_file"_fastqc_results" $fastq_file + fastqc -t ${task.cpus} -a "${config_dir}/adapter_list_fastqc.txt" -o $fastq_file"_fastqc_results" $fastq_file """ } diff --git a/test_data/test_config/adapter_list_fastqc.txt b/test_data/test_config/adapter_list_fastqc.txt new file mode 120000 index 0000000..b289175 --- /dev/null +++ b/test_data/test_config/adapter_list_fastqc.txt @@ -0,0 +1 @@ +../../config/tool_config/adapter_list_fastqc.txt \ No newline at end of file From b0f0e664afd852d21f791a44e911854edd9144eb Mon Sep 17 00:00:00 2001 From: monikaBrandt Date: Thu, 27 Jun 2024 14:30:22 +0200 Subject: [PATCH 2/4] Update config/tool_config/adapter_list_fastqc.txt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Matilda Åslin --- config/tool_config/adapter_list_fastqc.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/tool_config/adapter_list_fastqc.txt b/config/tool_config/adapter_list_fastqc.txt index f9c969c..b547dc7 100644 --- a/config/tool_config/adapter_list_fastqc.txt +++ b/config/tool_config/adapter_list_fastqc.txt @@ -26,6 +26,6 @@ Illumina TruSeq DNA Adapter AGATCGGAAGAG Illumina TruSeq Small RNA Adapter TGGAATTCTCGG -Illumina Stranded Total RNA Prep, Ligation with Ribo-Zero Plus CTGTCTCTTATA +Illumina Stranded Total RNA Prep CTGTCTCTTATA PolyA AAAAAAAAAAAA PolyG GGGGGGGGGGGG From fad7d1c141e84006f0de9249ec79f72f67824897 Mon Sep 17 00:00:00 2001 From: monikaBrandt Date: Thu, 27 Jun 2024 15:36:58 +0200 Subject: [PATCH 3/4] Update config/tool_config/adapter_list_fastqc.txt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Matilda Åslin --- config/tool_config/adapter_list_fastqc.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/tool_config/adapter_list_fastqc.txt b/config/tool_config/adapter_list_fastqc.txt index b547dc7..8b6987a 100644 --- a/config/tool_config/adapter_list_fastqc.txt +++ b/config/tool_config/adapter_list_fastqc.txt @@ -24,7 +24,7 @@ # of the sequences provided are the same length, so we've gone with 12bp # fragments for now. -Illumina TruSeq DNA Adapter AGATCGGAAGAG +Illumina TruSeq Adapter AGATCGGAAGAG Illumina TruSeq Small RNA Adapter TGGAATTCTCGG Illumina Stranded Total RNA Prep CTGTCTCTTATA PolyA AAAAAAAAAAAA From 4fdf5ce2dbc059e495d7037a4fda5c532217556a Mon Sep 17 00:00:00 2001 From: Monika Brandt Date: Thu, 27 Jun 2024 15:51:29 +0200 Subject: [PATCH 4/4] Fixed indentation and permalink --- config/tool_config/adapter_list_fastqc.txt | 2 +- main.nf | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/config/tool_config/adapter_list_fastqc.txt b/config/tool_config/adapter_list_fastqc.txt index 8b6987a..920e841 100644 --- a/config/tool_config/adapter_list_fastqc.txt +++ b/config/tool_config/adapter_list_fastqc.txt @@ -1,5 +1,5 @@ # This file is copied and modified from -# https://github.com/s-andrews/FastQC/blob/master/Configuration/adapter_list.txt +# https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/Configuration/adapter_list.txt # The file has been modified according to # https://knowledge.illumina.com/library-preparation/general/library-preparation-general-reference_material-list/000001314 # diff --git a/main.nf b/main.nf index b5966c1..4bc10d5 100644 --- a/main.nf +++ b/main.nf @@ -136,10 +136,10 @@ workflow CHECK_RUN_QUALITY { GET_METADATA(run_folder) project_and_reads = get_project_and_reads(params.run_folder) FASTQC(project_and_reads, - params.config_dir) + params.config_dir) FASTQ_SCREEN(project_and_reads, - params.config_dir, - params.fastqscreen_databases) + params.config_dir, + params.fastqscreen_databases) MULTIQC_PER_FLOWCELL( params.run_folder, FASTQC.out.map{ it[1] }.collect(), FASTQ_SCREEN.out.results.map{ it[1] }.collect(),