Molmed · matrulda · Jun 28, 2024 · Jun 26, 2024 · Jun 27, 2024 · Jun 27, 2024
diff --git a/config/tool_config/adapter_list_fastqc.txt b/config/tool_config/adapter_list_fastqc.txt
@@ -0,0 +1,31 @@
+# This file is copied and modified from 
+# https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/Configuration/adapter_list.txt
+# The file has been modified according to
+# https://knowledge.illumina.com/library-preparation/general/library-preparation-general-reference_material-list/000001314
+# 
+# This file contains a set of sequence fragments which will be explicitly
+# searched against your library.  The reporting will be similar to the 
+# Kmer plot, except that every sequence in this list will be shown so 
+# you can use this to judge the level of adapter read-through even if those
+# adapter sequences aren't picked out by the Kmer module.
+#
+# Since every sequence here will be analysed and the results plotted it 
+# doesn't make any sense to include duplicate sequences, or to add too
+# many sequences since your plot will end up a mess.
+#
+# You can add more sequences to the file by putting one line per entry
+# and specifying a name[tab]sequence.  If the contaminant you add is 
+# likely to be of use to others please consider sending it to the FastQ
+# authors, either via a bug report at www.bioinformatics.babraham.ac.uk/bugzilla/
+# or by directly emailing [email protected] so other users of
+# the program can benefit.
+#
+# For the time being it's going to be easier to interpret this plot if all
+# of the sequences provided are the same length, so we've gone with 12bp
+# fragments for now.
+
+Illumina TruSeq Adapter					AGATCGGAAGAG
+Illumina TruSeq Small RNA Adapter				TGGAATTCTCGG
+Illumina Stranded Total RNA Prep				CTGTCTCTTATA
+PolyA										AAAAAAAAAAAA
+PolyG										GGGGGGGGGGGG
diff --git a/main.nf b/main.nf
@@ -135,10 +135,11 @@ workflow CHECK_RUN_QUALITY {
         GET_QC_THRESHOLDS(run_folder)
         GET_METADATA(run_folder)
         project_and_reads = get_project_and_reads(params.run_folder)
-        FASTQC(project_and_reads)
+        FASTQC(project_and_reads,
+            params.config_dir)
         FASTQ_SCREEN(project_and_reads,
-		     params.config_dir,
-		     params.fastqscreen_databases)
+            params.config_dir,
+            params.fastqscreen_databases)
         MULTIQC_PER_FLOWCELL( params.run_folder,
             FASTQC.out.map{ it[1] }.collect(),
             FASTQ_SCREEN.out.results.map{ it[1] }.collect(),
@@ -168,14 +169,15 @@ process FASTQC {
 
     input:
     tuple val(project), path(fastq_file)
+    path config_dir
 
     output:
     tuple val(project), path("*_results")
 
     script:
     """
     mkdir -p $fastq_file"_fastqc_results"
-    fastqc -t ${task.cpus} -o $fastq_file"_fastqc_results" $fastq_file
+    fastqc -t ${task.cpus} -a "${config_dir}/adapter_list_fastqc.txt" -o $fastq_file"_fastqc_results" $fastq_file
     """
 }
 

diff --git a/test_data/test_config/adapter_list_fastqc.txt b/test_data/test_config/adapter_list_fastqc.txt
@@ -0,0 +1 @@
+../../config/tool_config/adapter_list_fastqc.txt