From 93e1ec293e1a4955b09908339aaa74a8c147c04d Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 16 Oct 2018 20:33:48 +0200 Subject: [PATCH 01/11] More MultiQC results :-) --- main.nf | 3 +++ 1 file changed, 3 insertions(+) diff --git a/main.nf b/main.nf index e98243c7f..10eaadc6e 100644 --- a/main.nf +++ b/main.nf @@ -614,12 +614,14 @@ process dedup{ if(params.singleEnd) { """ dedup -i $bam $treat_merged -o . -u + mv *.log dedup.log samtools sort -@ ${task.cpus} "$prefix"_rmdup.bam -o "$prefix".sorted.bam samtools index -@ ${task.cpus} "$prefix".sorted.bam """ } else { """ dedup -i $bam $treat_merged -o . -u + mv *.log dedup.log samtools sort -@ ${task.cpus} "$prefix"_rmdup.bam -o "$prefix".sorted.bam samtools index -@ ${task.cpus} "$prefix".sorted.bam """ @@ -841,6 +843,7 @@ process multiqc { file multiqc_config file ('fastqc/*') from ch_fastqc_results.collect().ifEmpty([]) file ('software_versions/*') from software_versions_yaml.collect().ifEmpty([]) + file ('adapter_removal/*') from ch_adapterremoval_logs.collect().ifEmpty([]) file ('idxstats/*') from ch_idxstats_for_multiqc.collect().ifEmpty([]) file ('preseq/*') from ch_preseq_results.collect().ifEmpty([]) file ('damageprofiler/*') from ch_damageprofiler_results.collect().ifEmpty([]) From 8add52c8e4b977d897d88c792c13dca4d378ec61 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 16 Oct 2018 21:20:53 +0200 Subject: [PATCH 02/11] Started implementing CircularMapper process - and also documented it :-) --- .travis.yml | 2 ++ docs/usage.md | 21 +++++++++++++++++++++ main.nf | 43 +++++++++++++++++++++++++++++++++++++------ 3 files changed, 60 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8af58bc58..6a1528c9a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,4 +40,6 @@ script: - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --complexity_filter # Test BAM Trimming - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --trim_bam + # Test running with CircularMapper + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --circularmapper --circulartarget 'NC_007596.2' diff --git a/docs/usage.md b/docs/usage.md index 9919f8b75..f767a8852 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -355,6 +355,8 @@ Sets the minimum overlap between two reads when read merging is performed. Defau ## Read Mapping Parameters +## BWA (default) + These parameters configure mapping algorithm parameters. ### `--bwaalnn` @@ -369,6 +371,25 @@ Configures the `bwa aln -k` parameter for the seeding phase in the mapping algor Configures the length of the seed used in `bwa aln -l`. Default is set to BWA default of `32`. +## CircularMapper + +### `--circularmapper` + +This turns on the CircularMapper application, that enhances the mapping procedure with the BWA algorithm on circular references utilizing a extend-remap procedure (see Peltzer et al 2016, Genome Biology for details). + +### `--circularextension` + +The number of bases to extend the reference genome with. By default this is set to `500` if not specified otherwise. + +### `--circulartarget` + +The chromosome in your FastA reference that you'd like to be treated as circular. By default this is set to `MT` but can be configured to match any other chromosome. + +### `--circularfilter` + +If you want to filter out reads that don't map to a circular chromosome, turn this on. By default this option is turned off. + + ## Read Filtering and Conversion Parameters Users can configure to keep/discard/extract certain groups of reads efficiently in the nf-core/eager pipeline. diff --git a/main.nf b/main.nf index 10eaadc6e..2172ff39d 100644 --- a/main.nf +++ b/main.nf @@ -120,6 +120,12 @@ params.bwaalnn = 0.04 params.bwaalnk = 2 params.bwaalnl = 32 +//Mapper to use, by default BWA aln will be used +params.circularmapper = false +params.circularextension = 500 +params.circulartarget = 'MT' +params.circularfilter = false + //BAM Filtering steps (default = keep mapped and unmapped in BAM file) params.bam_keep_mapped_only = false params.bam_keep_all = true @@ -160,7 +166,7 @@ wherearemyfiles = file("$baseDir/assets/where_are_my_files.txt") // Validate inputs Channel.fromPath("${params.fasta}") .ifEmpty { exit 1, "No genome specified! Please specify one with --fasta or --bwa_index"} - .into {ch_fasta_for_bwa_indexing;ch_fasta_for_faidx_indexing;ch_fasta_for_dict_indexing; ch_fasta_for_bwa_mapping; ch_fasta_for_damageprofiler; ch_fasta_for_qualimap; ch_fasta_for_pmdtools} + .into {ch_fasta_for_bwa_indexing;ch_fasta_for_faidx_indexing;ch_fasta_for_dict_indexing; ch_fasta_for_bwa_mapping; ch_fasta_for_damageprofiler; ch_fasta_for_qualimap; ch_fasta_for_pmdtools; ch_fasta_for_circularmapper} //AWSBatch sanity checking @@ -444,7 +450,7 @@ process adapter_removal { set val(name), file(reads) from ( params.complexity_filter ? ch_clipped_reads_complexity_filtered : ch_read_files_clip ) output: - file "*.combined*.gz" into (ch_clipped_reads, ch_clipped_reads_for_fastqc) + file "*.combined*.gz" into (ch_clipped_reads, ch_clipped_reads_for_fastqc,ch_clipped_reads_circularmapper) file "*.settings" into ch_adapterremoval_logs script: @@ -489,9 +495,6 @@ process fastqc_after_clipping { """ } -//Close that channel -ch_fastqc_after_clipping.close() - /* Step 3: Mapping with BWA, SAM to BAM, Sort BAM */ @@ -500,6 +503,8 @@ process bwa { tag "$prefix" publishDir "${params.outdir}/mapping/bwa", mode: 'copy' + when: !params.circularmapper + input: file(reads) from ch_clipped_reads file "*" from ch_bwa_index @@ -519,8 +524,34 @@ process bwa { """ } -//TODO Multi Lane BAM merging here (!) +process circularmapper{ + tag "$prefix" + publishDir "${params.outdir}/mapping/circularmapper", mode: 'copy' + + when: params.circularmapper + input: + file(reads) from ch_clipped_reads_circularmapper + file fasta from ch_fasta_for_circularmapper + + output: + file "*.sorted.bam" into ch_mapped_reads_idxstats,ch_mapped_reads_filter,ch_mapped_reads_preseq, ch_mapped_reads_damageprofiler + file "*.bai" + + script: + filter = ${params.circularfilter} ? '-f true -x false' : '' + prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ + stem_realigned = reads[0].toString()+"_realigned.bam" + """ + circulargenerator -e ${params.circularextension} -i $fasta -s ${params.circulartarget} + bwa index "*_${params.circularextension}.fasta" + bwa aln -t ${task.cpus} "${fasta.baseName}_${params.circularextension}.fasta" $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai" + bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads > tmp.out + realignsamfile -e ${params.circularextension} -i tmp.out -r $fasta $filter + samtools sort -@ ${task.cpus} -O bam ${stem_realigned} > "${prefix}".sorted.bam + samtools index -@ ${task.cpus} "${prefix}".sorted.bam + """ +} /* * Step 4 - IDXStats From 9cd9021d5bc6a888e4cd1a418bfc355ca068f797 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 16 Oct 2018 21:39:05 +0200 Subject: [PATCH 03/11] Some more basic work on CircularMapper --- main.nf | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/main.nf b/main.nf index 2172ff39d..27243a44a 100644 --- a/main.nf +++ b/main.nf @@ -531,20 +531,20 @@ process circularmapper{ when: params.circularmapper input: - file(reads) from ch_clipped_reads_circularmapper + file reads from ch_clipped_reads_circularmapper file fasta from ch_fasta_for_circularmapper output: - file "*.sorted.bam" into ch_mapped_reads_idxstats,ch_mapped_reads_filter,ch_mapped_reads_preseq, ch_mapped_reads_damageprofiler + file "*.sorted.bam" into ch_mapped_reads_idxstats_cm,ch_mapped_reads_filter_cm,ch_mapped_reads_preseq_cm, ch_mapped_reads_damageprofiler_cm file "*.bai" script: - filter = ${params.circularfilter} ? '-f true -x false' : '' + filter = "${params.circularfilter}" ? '' : '-f true -x false' prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ stem_realigned = reads[0].toString()+"_realigned.bam" """ circulargenerator -e ${params.circularextension} -i $fasta -s ${params.circulartarget} - bwa index "*_${params.circularextension}.fasta" + bwa index "${fasta.baseName}_${params.circularextension}.fasta" bwa aln -t ${task.cpus} "${fasta.baseName}_${params.circularextension}.fasta" $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai" bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads > tmp.out realignsamfile -e ${params.circularextension} -i tmp.out -r $fasta $filter @@ -562,7 +562,7 @@ process samtools_idxstats { publishDir "${params.outdir}/samtools/stats", mode: 'copy' input: - file(bam) from ch_mapped_reads_idxstats + file(bam) from ch_mapped_reads_idxstats.mix(ch_mapped_reads_idxstats_cm) output: file "*.stats" into ch_idxstats_for_multiqc @@ -590,7 +590,7 @@ process samtools_filter { } input: - file bam from ch_mapped_reads_filter + file bam from ch_mapped_reads_filter.mix(ch_mapped_reads_filter_cm) output: file "*filtered.bam" into ch_bam_filtered_qualimap, ch_bam_filtered_dedup, ch_bam_filtered_markdup, ch_bam_filtered_pmdtools, ch_bam_filtered_angsd, ch_bam_filtered_gatk @@ -671,7 +671,7 @@ process preseq { !params.skip_preseq input: - file input from (params.skip_deduplication ? ch_mapped_reads_preseq : ch_hist_for_preseq ) + file input from (params.skip_deduplication ? ch_mapped_reads_preseq.mix(ch_mapped_reads_preseq_cm) : ch_hist_for_preseq ) output: file "${input.baseName}.ccurve" into ch_preseq_results @@ -701,7 +701,7 @@ process damageprofiler { !params.skip_damage_calculation input: - file bam from ch_mapped_reads_damageprofiler + file bam from ch_mapped_reads_damageprofiler.mix(ch_mapped_reads_damageprofiler_cm) file fasta from ch_fasta_for_damageprofiler output: From 309997f722fc19fe42a6ec81101d8aa87679573d Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 16 Oct 2018 21:47:52 +0200 Subject: [PATCH 04/11] CircularMapper now functional --- main.nf | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/main.nf b/main.nf index 27243a44a..c497938e6 100644 --- a/main.nf +++ b/main.nf @@ -166,7 +166,7 @@ wherearemyfiles = file("$baseDir/assets/where_are_my_files.txt") // Validate inputs Channel.fromPath("${params.fasta}") .ifEmpty { exit 1, "No genome specified! Please specify one with --fasta or --bwa_index"} - .into {ch_fasta_for_bwa_indexing;ch_fasta_for_faidx_indexing;ch_fasta_for_dict_indexing; ch_fasta_for_bwa_mapping; ch_fasta_for_damageprofiler; ch_fasta_for_qualimap; ch_fasta_for_pmdtools; ch_fasta_for_circularmapper} + .into {ch_fasta_for_bwa_indexing;ch_fasta_for_faidx_indexing;ch_fasta_for_dict_indexing; ch_fasta_for_bwa_mapping; ch_fasta_for_damageprofiler; ch_fasta_for_qualimap; ch_fasta_for_pmdtools; ch_fasta_for_circularmapper; ch_fasta_for_circularmapper_index} //AWSBatch sanity checking @@ -524,6 +524,31 @@ process bwa { """ } +process circulargenerator{ + tag "$prefix" + publishDir "${params.outdir}/reference_genome/circularmapper_index", mode: 'copy', saveAs: { filename -> + if (params.saveReference) filename + else if(!params.saveReference && filename == "where_are_my_files.txt") filename + else null + } + + when: params.circularmapper + + input: + file fasta from ch_fasta_for_circularmapper_index + + output: + file "*.fasta*" into ch_circularmapper_indices + + script: + """ + circulargenerator -e ${params.circularextension} -i $fasta -s ${params.circulartarget} + bwa index "${fasta.baseName}_${params.circularextension}.fasta" + """ + +} + + process circularmapper{ tag "$prefix" publishDir "${params.outdir}/mapping/circularmapper", mode: 'copy' @@ -533,6 +558,7 @@ process circularmapper{ input: file reads from ch_clipped_reads_circularmapper file fasta from ch_fasta_for_circularmapper + file "*" from ch_circularmapper_indices output: file "*.sorted.bam" into ch_mapped_reads_idxstats_cm,ch_mapped_reads_filter_cm,ch_mapped_reads_preseq_cm, ch_mapped_reads_damageprofiler_cm @@ -541,14 +567,11 @@ process circularmapper{ script: filter = "${params.circularfilter}" ? '' : '-f true -x false' prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ - stem_realigned = reads[0].toString()+"_realigned.bam" """ - circulargenerator -e ${params.circularextension} -i $fasta -s ${params.circulartarget} - bwa index "${fasta.baseName}_${params.circularextension}.fasta" bwa aln -t ${task.cpus} "${fasta.baseName}_${params.circularextension}.fasta" $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai" - bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads > tmp.out + bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" "${fasta.baseName}_${params.circularextension}.fasta" "${reads.baseName}".sai $reads > tmp.out realignsamfile -e ${params.circularextension} -i tmp.out -r $fasta $filter - samtools sort -@ ${task.cpus} -O bam ${stem_realigned} > "${prefix}".sorted.bam + samtools sort -@ ${task.cpus} -O bam tmp_realigned.bam > "${prefix}".sorted.bam samtools index -@ ${task.cpus} "${prefix}".sorted.bam """ } From 2d7723c95d7982e9e6920be6e82039946e915c37 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 17 Oct 2018 21:06:04 +0200 Subject: [PATCH 05/11] Typo fix --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index de5cc37d9..81e88a047 100644 --- a/nextflow.config +++ b/nextflow.config @@ -10,7 +10,7 @@ // Global default params, used in configs params { - pipelineVersion = '2.0dev' // Pipeline version + pipelineVersion = '2.0.0dev' // Pipeline version container = 'nfcore/eager:latest' //Pipeline options From 936b83bf4d454c0906a6ab4bd925e7e7b706eed5 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 17 Oct 2018 21:25:16 +0200 Subject: [PATCH 06/11] Support for BWA Mem + docs + tests --- .travis.yml | 3 ++- docs/usage.md | 5 +++++ main.nf | 44 ++++++++++++++++++++++++++++++++++++-------- 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/.travis.yml b/.travis.yml index 6a1528c9a..6e730aea0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -42,4 +42,5 @@ script: - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --trim_bam # Test running with CircularMapper - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --circularmapper --circulartarget 'NC_007596.2' - + # Test running with BWA Mem + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --bwamem diff --git a/docs/usage.md b/docs/usage.md index f767a8852..8f0036eb4 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -389,6 +389,11 @@ The chromosome in your FastA reference that you'd like to be treated as circular If you want to filter out reads that don't map to a circular chromosome, turn this on. By default this option is turned off. +## BWA Mem + +### `--bwamem` + +Turn this on to utilize BWA Mem instead of `bwa aln` for alignment. Can be quite useful for modern DNA, but is rarely used in projects for ancient DNA. ## Read Filtering and Conversion Parameters diff --git a/main.nf b/main.nf index c497938e6..6d13ea188 100644 --- a/main.nf +++ b/main.nf @@ -126,6 +126,10 @@ params.circularextension = 500 params.circulartarget = 'MT' params.circularfilter = false +//BWAMem Specific Settings +params.bwamem = false +params.bwamem = + //BAM Filtering steps (default = keep mapped and unmapped in BAM file) params.bam_keep_mapped_only = false params.bam_keep_all = true @@ -166,7 +170,7 @@ wherearemyfiles = file("$baseDir/assets/where_are_my_files.txt") // Validate inputs Channel.fromPath("${params.fasta}") .ifEmpty { exit 1, "No genome specified! Please specify one with --fasta or --bwa_index"} - .into {ch_fasta_for_bwa_indexing;ch_fasta_for_faidx_indexing;ch_fasta_for_dict_indexing; ch_fasta_for_bwa_mapping; ch_fasta_for_damageprofiler; ch_fasta_for_qualimap; ch_fasta_for_pmdtools; ch_fasta_for_circularmapper; ch_fasta_for_circularmapper_index} + .into {ch_fasta_for_bwa_indexing;ch_fasta_for_faidx_indexing;ch_fasta_for_dict_indexing; ch_fasta_for_bwa_mapping; ch_fasta_for_damageprofiler; ch_fasta_for_qualimap; ch_fasta_for_pmdtools; ch_fasta_for_circularmapper; ch_fasta_for_circularmapper_index;ch_fasta_for_bwamem_mapping} //AWSBatch sanity checking @@ -317,7 +321,7 @@ process makeBWAIndex { file wherearemyfiles output: - file "*.{amb,ann,bwt,pac,sa,fasta,fa}" into ch_bwa_index + file "*.{amb,ann,bwt,pac,sa,fasta,fa}" into (ch_bwa_index,ch_bwa_index_bwamem) file "where_are_my_files.txt" script: @@ -450,7 +454,7 @@ process adapter_removal { set val(name), file(reads) from ( params.complexity_filter ? ch_clipped_reads_complexity_filtered : ch_read_files_clip ) output: - file "*.combined*.gz" into (ch_clipped_reads, ch_clipped_reads_for_fastqc,ch_clipped_reads_circularmapper) + file "*.combined*.gz" into (ch_clipped_reads, ch_clipped_reads_for_fastqc,ch_clipped_reads_circularmapper,ch_clipped_reads_bwamem) file "*.settings" into ch_adapterremoval_logs script: @@ -503,7 +507,7 @@ process bwa { tag "$prefix" publishDir "${params.outdir}/mapping/bwa", mode: 'copy' - when: !params.circularmapper + when: !params.circularmapper && !params.bwamem input: file(reads) from ch_clipped_reads @@ -576,6 +580,30 @@ process circularmapper{ """ } +process bwamem { + tag "$prefix" + publishDir "${params.outdir}/mapping/bwamem", mode: 'copy' + + when: params.bwamem && !params.circularmapper + + input: + file(reads) from ch_clipped_reads_bwamem + file "*" from ch_bwa_index_bwamem + file fasta from ch_fasta_for_bwamem_mapping + + output: + file "*.sorted.bam" into ch_bwamem_mapped_reads_idxstats,ch_bwamem_mapped_reads_filter,ch_bwamem_mapped_reads_preseq, ch_bwamem_mapped_reads_damageprofiler + file "*.bai" + + + script: + prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ + """ + bwa mem -t ${task.cpus} $fasta $reads -R "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam + samtools index -@ ${task.cpus} "${prefix}".sorted.bam + """ +} + /* * Step 4 - IDXStats */ @@ -585,7 +613,7 @@ process samtools_idxstats { publishDir "${params.outdir}/samtools/stats", mode: 'copy' input: - file(bam) from ch_mapped_reads_idxstats.mix(ch_mapped_reads_idxstats_cm) + file(bam) from ch_mapped_reads_idxstats.mix(ch_mapped_reads_idxstats_cm,ch_bwamem_mapped_reads_idxstats) output: file "*.stats" into ch_idxstats_for_multiqc @@ -613,7 +641,7 @@ process samtools_filter { } input: - file bam from ch_mapped_reads_filter.mix(ch_mapped_reads_filter_cm) + file bam from ch_mapped_reads_filter.mix(ch_mapped_reads_filter_cm,ch_bwamem_mapped_reads_filter) output: file "*filtered.bam" into ch_bam_filtered_qualimap, ch_bam_filtered_dedup, ch_bam_filtered_markdup, ch_bam_filtered_pmdtools, ch_bam_filtered_angsd, ch_bam_filtered_gatk @@ -694,7 +722,7 @@ process preseq { !params.skip_preseq input: - file input from (params.skip_deduplication ? ch_mapped_reads_preseq.mix(ch_mapped_reads_preseq_cm) : ch_hist_for_preseq ) + file input from (params.skip_deduplication ? ch_mapped_reads_preseq.mix(ch_mapped_reads_preseq_cm,ch_bwamem_mapped_reads_preseq) : ch_hist_for_preseq ) output: file "${input.baseName}.ccurve" into ch_preseq_results @@ -724,7 +752,7 @@ process damageprofiler { !params.skip_damage_calculation input: - file bam from ch_mapped_reads_damageprofiler.mix(ch_mapped_reads_damageprofiler_cm) + file bam from ch_mapped_reads_damageprofiler.mix(ch_mapped_reads_damageprofiler_cm,ch_bwamem_mapped_reads_damageprofiler) file fasta from ch_fasta_for_damageprofiler output: From 4423ff8df8b3a4cf6d6397088b675a6556be2db7 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 17 Oct 2018 21:39:05 +0200 Subject: [PATCH 07/11] Require either --singleEnd or --pairedEnd --- .travis.yml | 10 +++++----- docs/usage.md | 11 ++++++++++- main.nf | 12 ++++++++++-- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/.travis.yml b/.travis.yml index 6e730aea0..8d16d3178 100644 --- a/.travis.yml +++ b/.travis.yml @@ -33,14 +33,14 @@ script: # Lint the pipeline code - nf-core lint ${TRAVIS_BUILD_DIR} # Run the basic pipeline with the test profile - - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd # Run the basic pipeline with single end data (pretending its single end actually) - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --singleEnd # Run the same pipeline testing optional step: fastp, complexity - - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --complexity_filter + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --complexity_filter # Test BAM Trimming - - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --trim_bam + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --trim_bam # Test running with CircularMapper - - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --circularmapper --circulartarget 'NC_007596.2' + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --circularmapper --circulartarget 'NC_007596.2' # Test running with BWA Mem - - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --bwamem + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --bwamem diff --git a/docs/usage.md b/docs/usage.md index 8f0036eb4..0e54f9ecf 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -136,7 +136,7 @@ Please note the following requirements: If left unspecified, a default pattern is used: `data/*{1,2}.fastq.gz` ### `--singleEnd` -By default, the pipeline expects paired-end data. If you have single-end data, you need to specify `--singleEnd` on the command line when you launch the pipeline. A normal glob pattern, enclosed in quotation marks, can then be used for `--reads`. For example: +If you have single-end data, you need to specify `--singleEnd` on the command line when you launch the pipeline. A normal glob pattern, enclosed in quotation marks, can then be used for `--reads`. For example: ```bash --singleEnd --reads '*.fastq' @@ -144,6 +144,15 @@ By default, the pipeline expects paired-end data. If you have single-end data, y It is not possible to run a mixture of single-end and paired-end files in one run. +### `--pairedEnd` +If you have paired-end data, you need to specify `--pairedEnd` on the command line when you launc hthe pipeline. + +A normal glob pattern, enclosed in quotation marks, can then be used for `--reads`. For example: + +```bash +--pairedEnd --reads '*.fastq' +``` + ## Reference Genomes The pipeline config files come bundled with paths to the illumina iGenomes reference index files. If running with docker or AWS, the configuration is set up to use the [AWS-iGenomes](https://ewels.github.io/AWS-iGenomes/) resource. diff --git a/main.nf b/main.nf index 6d13ea188..d9254f29d 100644 --- a/main.nf +++ b/main.nf @@ -83,6 +83,7 @@ if (params.help){ // Configurable variables params.name = false params.singleEnd = false +params.pairedEnd = false params.genome = "Custom" params.snpcapture = false params.bedfile = '' @@ -128,7 +129,6 @@ params.circularfilter = false //BWAMem Specific Settings params.bwamem = false -params.bwamem = //BAM Filtering steps (default = keep mapped and unmapped in BAM file) params.bam_keep_mapped_only = false @@ -172,13 +172,21 @@ Channel.fromPath("${params.fasta}") .ifEmpty { exit 1, "No genome specified! Please specify one with --fasta or --bwa_index"} .into {ch_fasta_for_bwa_indexing;ch_fasta_for_faidx_indexing;ch_fasta_for_dict_indexing; ch_fasta_for_bwa_mapping; ch_fasta_for_damageprofiler; ch_fasta_for_qualimap; ch_fasta_for_pmdtools; ch_fasta_for_circularmapper; ch_fasta_for_circularmapper_index;ch_fasta_for_bwamem_mapping} -//AWSBatch sanity checking +//Validate that either pairedEnd or singleEnd has been specified by the user! +if( params.singleEnd || params.pairedEnd ){ +} else { + exit 1, "Please specify either --singleEnd or --pairedEnd to execute the pipeline!" +} + +//AWSBatch sanity checking if(workflow.profile == 'awsbatch'){ if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" if (!workflow.workDir.startsWith('s3') || !params.outdir.startsWith('s3')) exit 1, "Specify S3 URLs for workDir and outdir parameters on AWSBatch!" } + + // Has the run name been specified by the user? // this has the bonus effect of catching both -name and --name custom_runName = params.name From 2530d8d78856e3b4691aad489bb8cc865c23dfee Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 17 Oct 2018 21:54:35 +0200 Subject: [PATCH 08/11] Add better usage description --- docs/usage.md | 3 ++- main.nf | 44 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 8f0036eb4..5a03304ec 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -327,6 +327,7 @@ Turns off duplicate removal methods DeDup and MarkDuplicates respectively. No du Performs a poly-G complexity filtering step in the beginning of the pipeline if turne on. This can be useful for especially assembly projects where low-complexity regions might dramatically influence the assembly of contigs. ## Complexity Filtering Options + ### `--complexity_filter_poly_g_min` This option can be used to define the minimum value for the poly-G filtering step in low complexity filtering. By default, this is set to a value of `10` unless the user has chosen something specifically using this option. @@ -401,7 +402,7 @@ Users can configure to keep/discard/extract certain groups of reads efficiently ### `--bam_keep_mapped_only` -This can be used to only keep mapped reads for downstream analysis. By default turned off, all reads are kept in the BAM file. +This can be used to only keep mapped reads for downstream analysis. By default turned off, all reads are kept in the BAM file. Unmapped reads are stored both in BAM and FastQ format e.g. for different downstream processing. ### `--bam_keep_all` diff --git a/main.nf b/main.nf index 6d13ea188..1599e4e19 100644 --- a/main.nf +++ b/main.nf @@ -59,8 +59,49 @@ def helpMessage() { --clip_readlength Specify read minimum length to be kept for downstream analysis --clip_min_read_quality Specify minimum base quality for not trimming off bases --min_adap_overlap Specify minimum adapter overlap + + BWA Mapping + --bwaalnn Specify the -n parameter for BWA aln + --bwaalnk Specify the -k parameter for BWA aln + --bwaalnl Specify the -l parameter for BWA aln + + CircularMapper + --circularmapper Turn on CircularMapper (CM) + --circularextension Specify the number of bases to extend + --circulartarget Specify the target chromosome for CM + --circularfilter Specify to filter off-target reads + + BWA Mem Mapping + --bwamem Turn on BWA Mem instead of CM/BWA aln for mapping + + BAM Filtering + --bam_keep_mapped_only Only consider mapped reads for downstream analysis. Unmapped reads are extracted to separate output. + --bam_filter_reads Keep all reads in BAM file for downstream analysis + --bam_mapping_quality_threshold Minimum mapping quality for reads filter + + DeDuplication + --dedupper Deduplication method to use + --dedup_all_merged Treat all reads as merged reads + + Library Complexity Estimation + --preseq_step_size Specify the step size of Preseq + + (aDNA) Damage Analysis + --damageprofiler_length Specify length filter for DamageProfiler + --damageprofiler_threshold Specify number of bases to consider for damageProfiler + --run_pmdtools Turn on PMDtools + --pmdtools_range Specify range of bases for PMDTools + --pmdtools_threshold Specify PMDScore threshold for PMDTools + --pmdtools_reference_mask Specify a reference mask for PMDTools + --pmdtools_max_reads Specify the max. number of reads to consider for metrics generation + + BAM Trimming + --trim_bam Turn on BAM trimming for UDG(+ or 1/2) protocols + --bamutils_clip_left / --bamutils_clip_right Specify the number of bases to clip off reads + --bamutils_softclip Use softclip instead of hard masking + - For a full list of available parameters, consider the documentation. + For a full list and more information of available parameters, consider the documentation. Other options: @@ -128,7 +169,6 @@ params.circularfilter = false //BWAMem Specific Settings params.bwamem = false -params.bwamem = //BAM Filtering steps (default = keep mapped and unmapped in BAM file) params.bam_keep_mapped_only = false From 2083ed4ff0ee84e9408d94cce55d5023fecfa239 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 17 Oct 2018 21:58:16 +0200 Subject: [PATCH 09/11] Minor readme updates --- README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 711a09e01..eed6776f7 100644 --- a/README.md +++ b/README.md @@ -21,13 +21,14 @@ The pipeline uses [Nextflow](https://www.nextflow.io), a bioinformatics workflow * Sequence Dictionary * QC with FastQC * AdapterRemoval for read clipping and merging -* Read mapping with BWA +* Read mapping with BWA, BWA Mem or CircularMapper * Samtools sort, index, stats & conversion to BAM -* DeDup read deduplication / MarkDuplicates +* DeDup or MarkDuplicates read deduplication * QualiMap BAM QC Checking -* Preseq estimation +* Preseq Library Complexity Estimation * DamageProfiler damage profiling -* PMDTools damagge filtering / assessment +* BAM Clipping for UDG+/UDGhalf protocols +* PMDTools damage filtering / assessment ### Documentation The nf-core/eager pipeline comes with documentation about the pipeline, found in the `docs/` directory: From aacbe83843dea60f0b7cf5856c74f2c0e7a1cabd Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 17 Oct 2018 22:55:02 +0200 Subject: [PATCH 10/11] Prepare for release 2.0.0 --- .travis.yml | 2 +- Dockerfile | 2 +- Singularity | 4 ++-- environment.yml | 2 +- nextflow.config | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8d16d3178..7b5cce20d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,7 +13,7 @@ before_install: # Pull the docker image first so the test doesn't wait for this - docker pull nfcore/eager # Fake the tag locally so that the pipeline runs properly - - docker tag nfcore/eager nfcore/eager:latest + - docker tag nfcore/eager nfcore/eager:2.0.0 install: # Install Nextflow diff --git a/Dockerfile b/Dockerfile index 3609575ec..cffd879a6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,4 +3,4 @@ FROM nfcore/base LABEL description="Docker image containing all requirements for nf-core/eager pipeline" COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/nf-core-eager-2.0.0dev/bin:$PATH +ENV PATH /opt/conda/envs/nf-core-eager-2.0.0/bin:$PATH diff --git a/Singularity b/Singularity index 794aad31f..a2da2cb7d 100644 --- a/Singularity +++ b/Singularity @@ -4,10 +4,10 @@ Bootstrap:docker %labels MAINTAINER Alexander Peltzer DESCRIPTION Container image containing all requirements for the nf-core/eager pipeline - VERSION 2.0.0dev + VERSION 2.0.0 %environment - PATH=/opt/conda/envs/nf-core-eager-2.0.0dev/bin:$PATH + PATH=/opt/conda/envs/nf-core-eager-2.0.0/bin:$PATH export PATH %files diff --git a/environment.yml b/environment.yml index ba56253ec..ad22d3990 100644 --- a/environment.yml +++ b/environment.yml @@ -1,4 +1,4 @@ -name: nf-core-eager-2.0.0dev +name: nf-core-eager-2.0.0 channels: - defaults - conda-forge diff --git a/nextflow.config b/nextflow.config index 81e88a047..928e1a34f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,7 +11,7 @@ // Global default params, used in configs params { pipelineVersion = '2.0.0dev' // Pipeline version - container = 'nfcore/eager:latest' + container = 'nfcore/eager:2.0.0' //Pipeline options aligner = 'bwa' @@ -100,7 +100,7 @@ manifest { name = 'nf-core/eager' author = 'Alexander Peltzer, Stephen Clayton, James A Fellows-Yates' homePage = 'https://github.com/nf-core/eager' - version = '2.0.0dev' + version = '2.0.0' description = 'A fully reproducible and modern ancient DNA pipeline in Nextflow and with cloud support.' mainScript = 'main.nf' nextflowVersion = '>=0.32.0' From 47081dee06fcbfcfeea9154ea22fbc08fa62afce Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 17 Oct 2018 22:56:37 +0200 Subject: [PATCH 11/11] Update Changelog --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ee9e1087d..dbee1c282 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,13 +1,13 @@ # nf-core/eager: Changelog -## 2.0 "Kaufbeuren" - 2018-10-14 +## 2.0 "Kaufbeuren" - 2018-10-17 Initial release of nf-core/eager featuring: * FastQC read quality control * (Optional) Read complexity filtering with FastP * Read merging and clipping using AdapterRemoval v2 -* Mapping using BWA +* Mapping using BWA / BWA Mem or CircularMapper * Library Complexity Estimation with Preseq * Conversion and Filtering of BAM files using Samtools * Damage assessment via DamageProfiler, additional filtering using PMDTools