From 9652a7e4f21097a58144869ba1ad6d3601b5ad42 Mon Sep 17 00:00:00 2001
From: Monika Brandt <monika.brandt@medsci.uu.se>
Date: Wed, 26 Jun 2024 13:46:47 +0200
Subject: [PATCH 1/4] Use customized list of adapters, FastQC

When hoovering over the adapter content plot in the the FastQC output
displayed in the end of all seqreports the name of the adapter found
is appearing. The adapters and names used are by default the ones
specified in the FastQC software. However, the provided list is outdated
and many of the adapters are used in several library preparation kits. A
customized list can now be used as input instead to look for adapters we
use at SNP&SEQ and to display relevant names when found.

Files changed:
- New file, adapter_list_fastqc.txt, added in config/tool_config/ to
  specify the adapters to look for.
- The fastqc command in main.nf is now updated with the additional
  a-flag that is used to specify the use of the file described under the
previous point.
---
 config/tool_config/adapter_list_fastqc.txt    | 31 +++++++++++++++++++
 main.nf                                       |  6 ++--
 test_data/test_config/adapter_list_fastqc.txt |  1 +
 3 files changed, 36 insertions(+), 2 deletions(-)
 create mode 100644 config/tool_config/adapter_list_fastqc.txt
 create mode 120000 test_data/test_config/adapter_list_fastqc.txt

diff --git a/config/tool_config/adapter_list_fastqc.txt b/config/tool_config/adapter_list_fastqc.txt
new file mode 100644
index 0000000..f9c969c
--- /dev/null
+++ b/config/tool_config/adapter_list_fastqc.txt
@@ -0,0 +1,31 @@
+# This file is copied and modified from 
+# https://github.com/s-andrews/FastQC/blob/master/Configuration/adapter_list.txt
+# The file has been modified according to
+# https://knowledge.illumina.com/library-preparation/general/library-preparation-general-reference_material-list/000001314
+# 
+# This file contains a set of sequence fragments which will be explicitly
+# searched against your library.  The reporting will be similar to the 
+# Kmer plot, except that every sequence in this list will be shown so 
+# you can use this to judge the level of adapter read-through even if those
+# adapter sequences aren't picked out by the Kmer module.
+#
+# Since every sequence here will be analysed and the results plotted it 
+# doesn't make any sense to include duplicate sequences, or to add too
+# many sequences since your plot will end up a mess.
+#
+# You can add more sequences to the file by putting one line per entry
+# and specifying a name[tab]sequence.  If the contaminant you add is 
+# likely to be of use to others please consider sending it to the FastQ
+# authors, either via a bug report at www.bioinformatics.babraham.ac.uk/bugzilla/
+# or by directly emailing simon.andrews@babraham.ac.uk so other users of
+# the program can benefit.
+#
+# For the time being it's going to be easier to interpret this plot if all
+# of the sequences provided are the same length, so we've gone with 12bp
+# fragments for now.
+
+Illumina TruSeq DNA Adapter					AGATCGGAAGAG
+Illumina TruSeq Small RNA Adapter				TGGAATTCTCGG
+Illumina Stranded Total RNA Prep, Ligation with Ribo-Zero Plus				CTGTCTCTTATA
+PolyA										AAAAAAAAAAAA
+PolyG										GGGGGGGGGGGG
diff --git a/main.nf b/main.nf
index e2230ee..b5966c1 100644
--- a/main.nf
+++ b/main.nf
@@ -135,7 +135,8 @@ workflow CHECK_RUN_QUALITY {
         GET_QC_THRESHOLDS(run_folder)
         GET_METADATA(run_folder)
         project_and_reads = get_project_and_reads(params.run_folder)
-        FASTQC(project_and_reads)
+        FASTQC(project_and_reads,
+             params.config_dir)
         FASTQ_SCREEN(project_and_reads,
 		     params.config_dir,
 		     params.fastqscreen_databases)
@@ -168,6 +169,7 @@ process FASTQC {
 
     input:
     tuple val(project), path(fastq_file)
+    path config_dir
 
     output:
     tuple val(project), path("*_results")
@@ -175,7 +177,7 @@ process FASTQC {
     script:
     """
     mkdir -p $fastq_file"_fastqc_results"
-    fastqc -t ${task.cpus} -o $fastq_file"_fastqc_results" $fastq_file
+    fastqc -t ${task.cpus} -a "${config_dir}/adapter_list_fastqc.txt" -o $fastq_file"_fastqc_results" $fastq_file
     """
 }
 
diff --git a/test_data/test_config/adapter_list_fastqc.txt b/test_data/test_config/adapter_list_fastqc.txt
new file mode 120000
index 0000000..b289175
--- /dev/null
+++ b/test_data/test_config/adapter_list_fastqc.txt
@@ -0,0 +1 @@
+../../config/tool_config/adapter_list_fastqc.txt
\ No newline at end of file

From b0f0e664afd852d21f791a44e911854edd9144eb Mon Sep 17 00:00:00 2001
From: monikaBrandt <monikaBrandt@users.noreply.github.com>
Date: Thu, 27 Jun 2024 14:30:22 +0200
Subject: [PATCH 2/4] Update config/tool_config/adapter_list_fastqc.txt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Matilda Åslin <matilda.aslin@medsci.uu.se>
---
 config/tool_config/adapter_list_fastqc.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/tool_config/adapter_list_fastqc.txt b/config/tool_config/adapter_list_fastqc.txt
index f9c969c..b547dc7 100644
--- a/config/tool_config/adapter_list_fastqc.txt
+++ b/config/tool_config/adapter_list_fastqc.txt
@@ -26,6 +26,6 @@
 
 Illumina TruSeq DNA Adapter					AGATCGGAAGAG
 Illumina TruSeq Small RNA Adapter				TGGAATTCTCGG
-Illumina Stranded Total RNA Prep, Ligation with Ribo-Zero Plus				CTGTCTCTTATA
+Illumina Stranded Total RNA Prep				CTGTCTCTTATA
 PolyA										AAAAAAAAAAAA
 PolyG										GGGGGGGGGGGG

From fad7d1c141e84006f0de9249ec79f72f67824897 Mon Sep 17 00:00:00 2001
From: monikaBrandt <monikaBrandt@users.noreply.github.com>
Date: Thu, 27 Jun 2024 15:36:58 +0200
Subject: [PATCH 3/4] Update config/tool_config/adapter_list_fastqc.txt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Matilda Åslin <matilda.aslin@medsci.uu.se>
---
 config/tool_config/adapter_list_fastqc.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/tool_config/adapter_list_fastqc.txt b/config/tool_config/adapter_list_fastqc.txt
index b547dc7..8b6987a 100644
--- a/config/tool_config/adapter_list_fastqc.txt
+++ b/config/tool_config/adapter_list_fastqc.txt
@@ -24,7 +24,7 @@
 # of the sequences provided are the same length, so we've gone with 12bp
 # fragments for now.
 
-Illumina TruSeq DNA Adapter					AGATCGGAAGAG
+Illumina TruSeq Adapter					AGATCGGAAGAG
 Illumina TruSeq Small RNA Adapter				TGGAATTCTCGG
 Illumina Stranded Total RNA Prep				CTGTCTCTTATA
 PolyA										AAAAAAAAAAAA

From 4fdf5ce2dbc059e495d7037a4fda5c532217556a Mon Sep 17 00:00:00 2001
From: Monika Brandt <monika.brandt@medsci.uu.se>
Date: Thu, 27 Jun 2024 15:51:29 +0200
Subject: [PATCH 4/4] Fixed indentation and permalink

---
 config/tool_config/adapter_list_fastqc.txt | 2 +-
 main.nf                                    | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/config/tool_config/adapter_list_fastqc.txt b/config/tool_config/adapter_list_fastqc.txt
index 8b6987a..920e841 100644
--- a/config/tool_config/adapter_list_fastqc.txt
+++ b/config/tool_config/adapter_list_fastqc.txt
@@ -1,5 +1,5 @@
 # This file is copied and modified from 
-# https://github.com/s-andrews/FastQC/blob/master/Configuration/adapter_list.txt
+# https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/Configuration/adapter_list.txt
 # The file has been modified according to
 # https://knowledge.illumina.com/library-preparation/general/library-preparation-general-reference_material-list/000001314
 # 
diff --git a/main.nf b/main.nf
index b5966c1..4bc10d5 100644
--- a/main.nf
+++ b/main.nf
@@ -136,10 +136,10 @@ workflow CHECK_RUN_QUALITY {
         GET_METADATA(run_folder)
         project_and_reads = get_project_and_reads(params.run_folder)
         FASTQC(project_and_reads,
-             params.config_dir)
+            params.config_dir)
         FASTQ_SCREEN(project_and_reads,
-		     params.config_dir,
-		     params.fastqscreen_databases)
+            params.config_dir,
+            params.fastqscreen_databases)
         MULTIQC_PER_FLOWCELL( params.run_folder,
             FASTQC.out.map{ it[1] }.collect(),
             FASTQ_SCREEN.out.results.map{ it[1] }.collect(),