diff --git a/config/tool_config/adapter_list_fastqc.txt b/config/tool_config/adapter_list_fastqc.txt new file mode 100644 index 0000000..920e841 --- /dev/null +++ b/config/tool_config/adapter_list_fastqc.txt @@ -0,0 +1,31 @@ +# This file is copied and modified from +# https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/Configuration/adapter_list.txt +# The file has been modified according to +# https://knowledge.illumina.com/library-preparation/general/library-preparation-general-reference_material-list/000001314 +# +# This file contains a set of sequence fragments which will be explicitly +# searched against your library. The reporting will be similar to the +# Kmer plot, except that every sequence in this list will be shown so +# you can use this to judge the level of adapter read-through even if those +# adapter sequences aren't picked out by the Kmer module. +# +# Since every sequence here will be analysed and the results plotted it +# doesn't make any sense to include duplicate sequences, or to add too +# many sequences since your plot will end up a mess. +# +# You can add more sequences to the file by putting one line per entry +# and specifying a name[tab]sequence. If the contaminant you add is +# likely to be of use to others please consider sending it to the FastQ +# authors, either via a bug report at www.bioinformatics.babraham.ac.uk/bugzilla/ +# or by directly emailing simon.andrews@babraham.ac.uk so other users of +# the program can benefit. +# +# For the time being it's going to be easier to interpret this plot if all +# of the sequences provided are the same length, so we've gone with 12bp +# fragments for now. + +Illumina TruSeq Adapter AGATCGGAAGAG +Illumina TruSeq Small RNA Adapter TGGAATTCTCGG +Illumina Stranded Total RNA Prep CTGTCTCTTATA +PolyA AAAAAAAAAAAA +PolyG GGGGGGGGGGGG diff --git a/main.nf b/main.nf index e2230ee..4bc10d5 100644 --- a/main.nf +++ b/main.nf @@ -135,10 +135,11 @@ workflow CHECK_RUN_QUALITY { GET_QC_THRESHOLDS(run_folder) GET_METADATA(run_folder) project_and_reads = get_project_and_reads(params.run_folder) - FASTQC(project_and_reads) + FASTQC(project_and_reads, + params.config_dir) FASTQ_SCREEN(project_and_reads, - params.config_dir, - params.fastqscreen_databases) + params.config_dir, + params.fastqscreen_databases) MULTIQC_PER_FLOWCELL( params.run_folder, FASTQC.out.map{ it[1] }.collect(), FASTQ_SCREEN.out.results.map{ it[1] }.collect(), @@ -168,6 +169,7 @@ process FASTQC { input: tuple val(project), path(fastq_file) + path config_dir output: tuple val(project), path("*_results") @@ -175,7 +177,7 @@ process FASTQC { script: """ mkdir -p $fastq_file"_fastqc_results" - fastqc -t ${task.cpus} -o $fastq_file"_fastqc_results" $fastq_file + fastqc -t ${task.cpus} -a "${config_dir}/adapter_list_fastqc.txt" -o $fastq_file"_fastqc_results" $fastq_file """ } diff --git a/test_data/test_config/adapter_list_fastqc.txt b/test_data/test_config/adapter_list_fastqc.txt new file mode 120000 index 0000000..b289175 --- /dev/null +++ b/test_data/test_config/adapter_list_fastqc.txt @@ -0,0 +1 @@ +../../config/tool_config/adapter_list_fastqc.txt \ No newline at end of file