diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8c7a9c2a5..b9827c75d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,7 +31,7 @@ jobs: - "-profile test,docker --preprocessing_tool adapterremoval --preprocessing_adapterlist 'https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/adapterremoval/adapterremoval_adapterlist.txt' --sequencing_qc_tool falco --run_genotyping --genotyping_tool 'freebayes' --genotyping_source 'raw'" - "-profile test,docker --mapping_tool bwamem --run_mapdamage_rescaling --run_pmd_filtering --run_trim_bam --run_genotyping --genotyping_tool 'ug' --genotyping_source 'trimmed'" - "-profile test,docker --mapping_tool bowtie2 --damagecalculation_tool mapdamage --damagecalculation_mapdamage_downsample 100 --run_genotyping --genotyping_tool 'hc' --genotyping_source 'raw'" - - "-profile test,docker --skip_preprocessing --convert_inputbam" + - "-profile test,docker --mapping_tool circularmapper --skip_preprocessing --convert_inputbam --fasta_circular_target 'NC_007596.2' --fasta_circularmapper_elongationfactor 500" - "-profile test_humanbam,docker --run_mtnucratio --run_contamination_estimation_angsd --snpcapture_bed 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz' --run_genotyping --genotyping_tool 'pileupcaller' --genotyping_source 'raw'" - "-profile test_humanbam,docker --run_sexdeterrmine --run_genotyping --genotyping_tool 'angsd' --genotyping_source 'raw'" - "-profile test_multiref,docker" ## TODO add damage manipulation here instead once it goes multiref diff --git a/CITATIONS.md b/CITATIONS.md index 3715b56a8..8c742dc34 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -126,6 +126,10 @@ > Sex.DetERRmine.py Lamnidis, T.C. et al., 2018. Ancient Fennoscandian genomes reveal origin and spread of Siberian ancestry in Europe. Nature communications, 9(1), p.5018. Available at: http://dx.doi.org/10.1038/s41467-018-07483-5. Download: https://github.com/TCLamnidis/Sex.DetERRmine + - [CircularMapper](https://doi.org/10.1186/s13059-016-0918-z) + + > Peltzer, A., Jäger, G., Herbig, A., Seitz, A., Kniep, C., Krause, J., & Nieselt, K. (2016). EAGER: efficient ancient genome reconstruction. Genome Biology, 17(1), 1–14. doi: [10.1186/s13059-016-0918-z](https://doi.org/10.1186/s13059-016-0918-z) + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/assets/schema_fasta.json b/assets/schema_fasta.json index d53bca776..54e00de8e 100644 --- a/assets/schema_fasta.json +++ b/assets/schema_fasta.json @@ -46,7 +46,21 @@ "circular_target": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "The headers of the chromosome to be extended by circularmapper must not contain any spaces and no leading '>'." + "errorMessage": "The headers of the chromosome extended by circulargenerator must not contain any spaces and no leading '>'." + }, + "circularmapper_elongatedfasta": { + "type": "string", + "format": "file-path", + "pattern": "^\\S+\\.f(na|asta|a|as)(\\.gz)?$", + "exists": true, + "errorMessage": "The elongated Fasta files for the mapping reference must be provided with file extensions '.fasta', '.fa', '.fas', '.fna', '.fasta.gz','.fa.gz','.fas.gz', '.fna.gz' and cannot contain any spaces." + }, + "circularmapper_elongatedindex": { + "type": "string", + "format": "directory-path", + "pattern": "^\\S+$", + "exists": true, + "errorMessage": "The directories of the index files for the elongated mapping reference for circularmapper must not contain any spaces and have file extensions ''." }, "mitochondrion_header": { "type": "string", diff --git a/conf/modules.config b/conf/modules.config index 37c5d666b..fc0bf9b1f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -278,6 +278,35 @@ process { ] } + // Reference elongation and indexing for circular mapping + withName: GUNZIP_ELONGATED_FASTA { + publishDir = [ + path: { "${params.outdir}/reference/${meta.id}_${params.fasta_circularmapper_elongationfactor}/" }, + mode: params.publish_dir_mode, + pattern: '*_*[0-9].f*', + enabled: params.save_reference + ] + } + + withName: CIRCULARMAPPER_CIRCULARGENERATOR { + tag = { "${meta.id}_${params.fasta_circularmapper_elongationfactor}" } + publishDir = [ + path: { "${params.outdir}/reference/${meta.id}_${params.fasta_circularmapper_elongationfactor}/" }, + mode: params.publish_dir_mode, + pattern: '*_*[0-9].fasta', + enabled: params.save_reference + ] + } + + withName: BWA_INDEX_CIRCULARISED { + publishDir = [ + path: { "${params.outdir}/reference/${meta.id}_${params.fasta_circularmapper_elongationfactor}/" }, + mode: params.publish_dir_mode, + pattern: 'bwa', + enabled: params.save_reference + ] + } + // // BAM INPUT // @@ -294,6 +323,9 @@ process { withName: SAMTOOLS_INDEX_BAM_INPUT { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + publishDir = [ + enabled: false + ] } // @@ -404,7 +436,7 @@ process { withName: BWA_ALN { tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } ext.args = { "-n ${params.mapping_bwaaln_n} -k ${params.mapping_bwaaln_k} -l ${params.mapping_bwaaln_l} -o ${params.mapping_bwaaln_o}" } - ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } + ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.id_index}" } publishDir = [ enabled: false ] @@ -417,7 +449,7 @@ process { [ "-r '@RG\\tID:ILLUMINA-${meta.sample_id}_${meta.library_id}\\tSM:${meta.sample_id}\\tLB:${meta.library_id}\\tPL:illumina\\tPU:ILLUMINA-${meta.library_id}-${meta.strandedness}_stranded-${se_pe_string}'" ].join(' ').trim() } - ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } + ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.id_index}" } publishDir = [ enabled: false ] @@ -502,7 +534,7 @@ process { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_sorted" } publishDir = [ - path: { "${params.outdir}/mapping/" }, + path: { "${params.outdir}/mapping/${params.mapping_tool}/" }, mode: params.publish_dir_mode, pattern: '*.{bam}' ] @@ -513,22 +545,52 @@ process { ext.args = { params.fasta_largeref ? "-c" : "" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/mapping/" }, + path: { "${params.outdir}/mapping/${params.mapping_tool}/" }, mode: params.publish_dir_mode, pattern: '*.{bai,csi}' ] } - withName: SAMTOOLS_FLAGSTAT_MAPPED { + withName: SAMTOOLS_FLAGSTAT_MERGED_LANES { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } + ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_sorted" } publishDir = [ - path: { "${params.outdir}/mapping/" }, + path: { "${params.outdir}/mapping/${params.mapping_tool}/" }, mode: params.publish_dir_mode, pattern: '*.flagstat' ] } + // Circular mapping + // Configuration for BWA_ALN and BWA_SAMSE/SAMPE is the same as for the non-circular mapping + withName: CIRCULARMAPPER_REALIGNSAMFILE { + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + ext.args = { params.mapping_circularmapper_circularfilter ? "-f true -x true" : "" } + ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } + publishDir = [ + enabled: false + ] + } + + withName: ".*MAP:CIRCULARMAPPER:FASTQ_ALIGN_BWAALN_ELONGATED:SAMTOOLS_INDEX" { + tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + ext.args = { params.fasta_largeref ? "-c" : "" } + ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } + publishDir = [ + enabled: false + ] + } + + withName: ".*MAP:CIRCULARMAPPER:SAMTOOLS_INDEX_REALIGNED" { + tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + ext.args = { params.fasta_largeref ? "-c" : "" } + ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } + publishDir = [ + enabled: false + ] + + } + // // DEDUPLICATION // diff --git a/docs/development/manual_tests.md b/docs/development/manual_tests.md index eea6cab14..b1b2e3996 100644 --- a/docs/development/manual_tests.md +++ b/docs/development/manual_tests.md @@ -281,6 +281,38 @@ nextflow run ../main.nf -profile test,singularity --outdir ./results -resume -du ``` +### CircularMapper + +```bash +## CircularMapper with reference elongation +## Expect: Reference elongation is ran, and circularmapper SWF is ran. +## Check: Expect the elongated reference and BWA index directory within the `reference` directory. Also 2 bam files together with their BAIs and Flagstats in the `mapping/circularmapper` directory. +nextflow run main.nf -profile test,docker --outdir ./results -w work/ -resume -dump-channels -ansi-log false --fasta_circular_target 'NC_007596.2' --mapping_tool circularmapper --save_reference +``` + +```bash +## CircularMapper with an already elongated reference. Big reference flag. Also check that bwa_aln flags also propagate when using circularmapper. +## Expect: Reference elongation is NOT ran, and circularmapper SWF is ran. +## Check: 2 bam files together with their CSIs and Flagstats in the `mapping/circularmapper` directory. +## Also check the .command.sh for the -k and -n flags during BWA ALN. +nextflow run main.nf -profile test,docker --outdir ./results -w work/ -resume -dump-channels -ansi-log false --fasta_circular_target 'NC_007596.2' --mapping_tool circularmapper --fasta_circularmapper_elongatedfasta data/reference/Mammoth_MT_Krause_500/Mammoth_MT_Krause_500.fasta --fasta_circularmapper_elongatedindex data/reference/Mammoth_MT_Krause_500/bwa --fasta_largeref --mapping_bwaaln_n 0.05 --mapping_bwaaln_k 3 +``` + +```bash +## Multiref with circularmapper. reference_sheet_multiref.csv edited to include elongated reference and index from first CM manual test for Mammoth_MT. +## Expect: No elongation for Mammoth MT. Elongation for hs37d5_chr21-MT reference. +## Check: 2 bam files together with their CSIs and Flagstats in the `mapping/circularmapper` directory PER REFERENCE (3 libraries (from 2 samples) x 2 references x 3 files = 18 files total). +## Also, elongated hs37d5_chr21-MT is not saved, since --save_reference was not specified. But it did get elongated. +nextflow run main.nf -profile test_multiref,docker --outdir ./results -w work/ -resume -dump-channels -ansi-log false --fasta_sheet /Users/lamnidis/Software/github/jbv2/eager/data/reference/reference_sheet_multiref.csv --mapping_tool circularmapper --fasta_largeref --mapping_bwaaln_n 0.05 --mapping_bwaaln_k 3 +``` + +```bash +## Circularmapper with circularfilter, with a provided elongated reference. +## Expect: No elongation for Mammoth MT. +## Check: 2 bam files together with their CSIs and Flagstats in the `mapping/circularmapper` directory. (6 files total). Ensure files have the @SQ tag of the circular choromosome. +nextflow run main.nf -profile test,docker --outdir ./results -w work/ -resume -dump-channels -ansi-log false --fasta_circular_target 'NC_007596.2' --mapping_tool circularmapper --fasta_circularmapper_elongatedfasta data/reference/Mammoth_MT_Krause_500/Mammoth_MT_Krause_500.fasta --fasta_circularmapper_elongatedindex data/reference/Mammoth_MT_Krause_500/bwa --fasta_largeref --mapping_bwaaln_n 0.05 --mapping_bwaaln_k 3 --mapping_circularmapper_circularfilter +``` + ## Host Removal All possible parameters diff --git a/docs/output.md b/docs/output.md index 9237f0c55..e22b29149 100644 --- a/docs/output.md +++ b/docs/output.md @@ -95,6 +95,25 @@ Depending on what is supplied by the user, and if `--save_reference` is supplied It is highly recommend to move these files to a central location or cache directory on your machine to facilitate resume of the indices across different pipeline runs. In many cases indexing the reference genome for alignment can be the longest step of a pipeline run, therefore re-using indices in future runs (supplied to the pipeline with flags such as `--fasta_fai`, `--fasta_dict`, etc. or added to the reference sheet provided to `--fasta`) can greatly speed up analyses on other samples. +#### Reference Elongation + +
+Output files + +- `reference/` + - `_/` + - `*.{fasta,fna,fa,fa}`: Uncompressed input FASTA file (if supplied to pipeline gzipped). + - `bwa/`: + - `*.fasta.{amb,ann,bwt,pac,sa}`: BWA aligner(s) reference index files from `bwa index`. + +
+ +Mapping with `circularmapper` requires an elongated reference built by [CircularMapper/CircularGenerator](https://github.com/apeltzer/CircularMapper). CircularGenerator elongates the `--fasta_circular_target` of a supplied reference genome fasta by the number of base pairs specified in `--fasta_circularmapper_elongationfactor`. + +Depending on what is supplied by the user, and if `--save_reference` is supplied, this directory will contain the elongated reference fasta, as well as its corresponding bwa reference index files. + +It is highly recommend to move these files to a central location or cache directory on your machine to facilitate resume of the indices across different pipeline runs. In many cases indexing the reference genome for alignment can be the longest step of a pipeline run, therefore re-using indices in future runs (supplied to the pipeline with flags such as `--fasta_circularmapper_elongatedfasta`, `--fasta_circularmapper_elongatedindex`, etc. or added to the reference sheet provided to `--fasta`) can greatly speed up analyses on other samples. + ### Preprocessing #### Falco @@ -161,7 +180,7 @@ The resulting FASTQ files will only be present in your results directory if you
Output files -- `mapping/` +- `mapping/bwa{aln,mem}/` - `*.bam`: Sorted reads aligned against a reference genome in BAM format with no additional filtering. - `*.{bai,csi}`: Index file corresponding to a BAM file which is for faster downstream steps (e.g. SAMtools). @@ -176,7 +195,7 @@ The resulting FASTQ files will only be present in your results directory if you
Output files -- `mapping/` +- `mapping/bowtie2/` - `*.bam`: Sorted reads aligned against a reference genome in BAM format with no additional filtering. - `*.{bai,csi}`: Index file corresponding to a BAM file which is for faster downstream steps (e.g. SAMtools). @@ -186,6 +205,21 @@ The resulting FASTQ files will only be present in your results directory if you [Bowtie 2](https://bowtie-bio.sourceforge.net/bowtie2/manual.shtml) is an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences. It is particularly good at aligning reads of about 50 up to 100s of characters to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the genome with an FM Index (based on the Burrows-Wheeler Transform or BWT) to keep its memory footprint small and supports gapped, local, and paired-end alignment modes. +#### CircularMapper + +
+Output files + +- `mapping/circularmapper/` + + - `*.bam`: Sorted reads aligned against an elongated reference genome in BAM format with no additional filtering. + - `*.{bai,csi}`: Index file corresponding to a BAM file which is for faster downstream steps (e.g. SAMtools). + - `*.flagstat`: Statistics of aligned reads from SAMtools `flagstat`. + +
+ +[CircularMapper RealignSAMFile](https://github.com/apeltzer/CircularMapper/tree/master) is an extension to `bwa aln` for realigning reads mapped to circularised contigs. First, an elogated/circularised reference is built using CircularGenerator, then reads are mapped to this reference using BWA ALN. The resulting BAM file is then realigned using CircularMapper RealignSAMFile. The reference coordinates of this BAM file have been adjusted to those of the original reference genome (prior to elongation). + ### Host Removal
diff --git a/modules.json b/modules.json index 288e85c09..a732a3cf9 100644 --- a/modules.json +++ b/modules.json @@ -95,6 +95,16 @@ "git_sha": "02fd5bd7275abad27aad32d5c852e0a9b1b98882", "installed_by": ["modules"] }, + "circularmapper/circulargenerator": { + "branch": "master", + "git_sha": "a7b0131370d9bc38076efad88773bca5537203d0", + "installed_by": ["modules"] + }, + "circularmapper/realignsamfile": { + "branch": "master", + "git_sha": "a7b0131370d9bc38076efad88773bca5537203d0", + "installed_by": ["modules"] + }, "damageprofiler": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", diff --git a/modules/nf-core/circularmapper/circulargenerator/environment.yml b/modules/nf-core/circularmapper/circulargenerator/environment.yml new file mode 100644 index 000000000..f1e1201ef --- /dev/null +++ b/modules/nf-core/circularmapper/circulargenerator/environment.yml @@ -0,0 +1,9 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "circularmapper_circulargenerator" + +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::circularmapper=1.93.5 diff --git a/modules/nf-core/circularmapper/circulargenerator/main.nf b/modules/nf-core/circularmapper/circulargenerator/main.nf new file mode 100644 index 000000000..9463ec497 --- /dev/null +++ b/modules/nf-core/circularmapper/circulargenerator/main.nf @@ -0,0 +1,61 @@ +// This module does the following: +//creating a modified reference genome, with an elongation_factoration of the an specified amount of bases +process CIRCULARMAPPER_CIRCULARGENERATOR { + + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/circularmapper:1.93.5--h2a3209d_3': + 'biocontainers/circularmapper:1.93.5--h2a3209d_3' }" + + input: + tuple val(meta), path(reference) + tuple val(meta2), val(elongation_factor) + tuple val(meta3), val(target) + + output: + tuple val(meta), path("*_${elongation_factor}.fasta") , emit: fasta + tuple val(meta), path("*${elongation_factor}_elongated") , emit: elongated + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def full_extension = reference.getName().replaceFirst(reference.getSimpleName(), "") + """ + circulargenerator \ + -e ${elongation_factor} \ + -i ${reference} \ + -s ${target} \ + $args + + ## circulargenerator has a hardcoded output name. Rename if necessary to use prefix. + if [[ "${reference.getSimpleName()}_${elongation_factor}${full_extension}" != "${prefix}_${elongation_factor}.fasta" ]]; then + mv ${reference.getSimpleName()}_${elongation_factor}${full_extension} ${prefix}_${elongation_factor}.fasta + mv ${reference}_${elongation_factor}_elongated ${prefix}.fasta_${elongation_factor}_elongated + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + circulargenerator: \$(circulargenerator -h | grep 'usage' | sed 's/usage: CircularGenerator//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_${elongation_factor}.fasta + touch ${prefix}.fasta_${elongation_factor}_elongated + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + circulargenerator: \$(circulargenerator -h | grep 'usage' | sed 's/usage: CircularGenerator//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/circularmapper/circulargenerator/meta.yml b/modules/nf-core/circularmapper/circulargenerator/meta.yml new file mode 100644 index 000000000..baa39e74b --- /dev/null +++ b/modules/nf-core/circularmapper/circulargenerator/meta.yml @@ -0,0 +1,65 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "circularmapper_circulargenerator" +description: A method to improve mappings on circular genomes, using the BWA mapper. +keywords: + - sort + - example + - genomics +tools: + - "circulargenerator": + description: "Creating a modified reference genome, with an elongation of the an specified amount of bases" + homepage: "https://github.com/apeltzer/CircularMapper" + documentation: "https://github.com/apeltzer/CircularMapper/blob/master/docs/contents/userguide.rst" + tool_dev_url: "https://github.com/apeltzer/CircularMapper" + doi: "no DOI available" + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'sample1' ]` + - reference: + type: file + description: Genome fasta file + pattern: "*.fasta" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'sample1' ]` + - elongation_factor: + type: integer + description: The number of bases that the ends of the target chromosome in the reference genome should be elongated by + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'sample1' ]` + - target: + type: string + description: The name of the chromosome in the reference genome that should be elongated +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fasta: + type: file + description: Genome fasta file + pattern: "*.fasta" + - elongated: + type: file + description: File listing the chromosomes that were elongated + pattern: "*_elongated" +authors: + - "@apalleja" + - "@TCLamnidis" +maintainers: + - "" diff --git a/modules/nf-core/circularmapper/realignsamfile/environment.yml b/modules/nf-core/circularmapper/realignsamfile/environment.yml new file mode 100644 index 000000000..d9beb5ae1 --- /dev/null +++ b/modules/nf-core/circularmapper/realignsamfile/environment.yml @@ -0,0 +1,7 @@ +name: circularmapper_realignsamfile +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::circularmapper=1.93.5 diff --git a/modules/nf-core/circularmapper/realignsamfile/main.nf b/modules/nf-core/circularmapper/realignsamfile/main.nf new file mode 100644 index 000000000..6363b8d25 --- /dev/null +++ b/modules/nf-core/circularmapper/realignsamfile/main.nf @@ -0,0 +1,60 @@ +process CIRCULARMAPPER_REALIGNSAMFILE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/circularmapper:1.93.5--h4a94de4_1': + 'biocontainers/circularmapper:1.93.5--h4a94de4_1' }" + + input: + tuple val(meta), path(bam) + tuple val(meta2), path(fasta) + tuple val(meta3), val(elongation_factor) + tuple val(meta4), path(elongated_chr_list) + // NOTE: The elongated_chr_list is not used in the script, but is an implicit input that realignsamfile requires when using the `-f true` option. + // In its absence, when `-f true` is set, realignsamfile will remove all @SQ tags from the BAM header, breaking the bamfile. + + output: + tuple val(meta), path("*_realigned.bam") , emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.93.5' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + realignsamfile \\ + -Xmx${task.memory.toGiga()}g \\ + ${args} \\ + -e ${elongation_factor} \\ + -i ${bam} \\ + -r ${fasta} + + ## realignsamfile has a hardcoded output name. Rename if necessary to use prefix. + if [[ "${bam.getBaseName()}_realigned.bam" != "${prefix}_realigned.bam" ]]; then + mv ${bam.getBaseName()}_realigned.bam ${prefix}_realigned.bam + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + CircularMapper: ${VERSION} + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.93.5' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}_realigned.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + CircularMapper: ${VERSION} + END_VERSIONS + """ +} diff --git a/modules/nf-core/circularmapper/realignsamfile/meta.yml b/modules/nf-core/circularmapper/realignsamfile/meta.yml new file mode 100644 index 000000000..94f74069e --- /dev/null +++ b/modules/nf-core/circularmapper/realignsamfile/meta.yml @@ -0,0 +1,72 @@ +name: "circularmapper_realignsamfile" +description: Realign reads mapped with BWA to elongated reference genome +keywords: + - realign + - circular + - map + - reference + - fasta + - bam + - short-read + - bwa +tools: + - "circularmapper": + description: "A method to improve mappings on circular genomes such as Mitochondria." + homepage: "https://circularmapper.readthedocs.io/en/latest/index.html" + documentation: "https://circularmapper.readthedocs.io/en/latest/index.html" + tool_dev_url: "https://github.com/apeltzer/CircularMapper/" + doi: "10.1186/s13059-016-0918-z" + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - bam: + type: file + description: BAM/SAM file + pattern: "*.{bam,sam}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - fasta: + type: file + description: Input elongated genome fasta + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - elongation_factor: + type: integer + description: The elongation factor used when running circulargenerator, i.e. the number of bases that the ends of the target chromosome in the reference genome was elongated by + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - elongated_chr_list: + type: file + description: File listing the chromosomes that were elongated + pattern: "*_elongated" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - bam: + type: file + description: Realigned BAM file + pattern: "*.bam" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@shyama-mama" + - "@jbv2" + - "@TCLamnidis" diff --git a/nextflow.config b/nextflow.config index 602331d5e..0ad84d596 100644 --- a/nextflow.config +++ b/nextflow.config @@ -29,13 +29,16 @@ params { multiqc_methods_description = null // Main references - fasta = null - fasta_fai = null - fasta_dict = null - fasta_mapperindexdir = null - fasta_circular_target = null - fasta_largeref = false - fasta_sheet = null + fasta = null + fasta_fai = null + fasta_dict = null + fasta_mapperindexdir = null + fasta_circular_target = null + fasta_largeref = false + fasta_sheet = null + fasta_circularmapper_elongationfactor = 500 + fasta_circularmapper_elongatedfasta = null + fasta_circularmapper_elongatedindex = null // Shard Fastq options run_fastq_sharding = false @@ -108,20 +111,21 @@ params { preprocessing_adapterremoval_qualitymax = 41 // Mapping - mapping_tool = 'bwaaln' - mapping_bwaaln_n = 0.01 // From Oliva et al. 2021 (10.1093/bib/bbab076) - mapping_bwaaln_k = 2 - mapping_bwaaln_l = 1024 // From Oliva et al. 2021 (10.1093/bib/bbab076) - mapping_bwaaln_o = 2 // From Oliva et al. 2021 (10.1093/bib/bbab076) - mapping_bwamem_k = 19 - mapping_bwamem_r = 1.5 - mapping_bowtie2_alignmode = 'local' - mapping_bowtie2_sensitivity = 'sensitive' - mapping_bowtie2_n = 0 - mapping_bowtie2_l = 20 - mapping_bowtie2_trim5 = 0 - mapping_bowtie2_trim3 = 0 - mapping_bowtie2_maxins = 500 + mapping_tool = 'bwaaln' + mapping_bwaaln_n = 0.01 // From Oliva et al. 2021 (10.1093/bib/bbab076) + mapping_bwaaln_k = 2 + mapping_bwaaln_l = 1024 // From Oliva et al. 2021 (10.1093/bib/bbab076) + mapping_bwaaln_o = 2 // From Oliva et al. 2021 (10.1093/bib/bbab076) + mapping_bwamem_k = 19 + mapping_bwamem_r = 1.5 + mapping_bowtie2_alignmode = 'local' + mapping_bowtie2_sensitivity = 'sensitive' + mapping_bowtie2_n = 0 + mapping_bowtie2_l = 20 + mapping_bowtie2_trim5 = 0 + mapping_bowtie2_trim3 = 0 + mapping_bowtie2_maxins = 500 + mapping_circularmapper_circularfilter = false // BAM Filtering run_bamfiltering = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 07f4473a3..dd252a9aa 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -84,6 +84,12 @@ "help_text": "For most people this will likely be the same directory that contains the file you provided to `--fasta`.\n\nIf you want to use pre-existing `bwa index` indices, the directory should contain files ending in '.amb' '.ann' '.bwt'. If you want to use pre-existing `bowtie2 build` indices, the directory should contain files ending in'.1.bt2', '.2.bt2', '.rev.1.bt2'.\n\nIn any case do not include the files themselves in the path. nf-core/eager will automagically detect the index files by searching for the FASTA filename with the corresponding `bwa index`/`bowtie2 build` file suffixes. If not supplied, the indices will be generated for you.\n\n", "fa_icon": "fas fa-folder-open" }, + "fasta_largeref": { + "type": "boolean", + "description": "Specify to generate '.csi' BAM indices instead of '.bai' for larger reference genomes.", + "help_text": "This parameter is required to be set for large reference genomes. If your reference genome is larger than 3.5GB, the `samtools index` calls in the pipeline need to generate `.csi` indices instead of `.bai` indices to compensate for the size of the reference genome (with samtools: `-c`). This parameter is not required for smaller references (including the human reference genomes hg19 or grch37/grch38).", + "fa_icon": "fas fa-address-book" + }, "save_reference": { "type": "boolean", "description": "Specify to save any pipeline-generated reference genome indices in the results directory.", @@ -126,9 +132,28 @@ }, "fasta_circular_target": { "type": "string", - "description": "Specify the FASTA header of the target chromosome to extend when using `circularmapper`.", + "description": "Specify the FASTA header of the extended chromosome when using `circularmapper`.", "help_text": "The entry (chromosome, contig, etc.) in your FASTA reference that you'd like to be treated as circular.\n\nApplies only when providing a single FASTA file via `--fasta` (NOT multi-reference input - see reference TSV/CSV input).\n\n> Modifies tool parameter(s):\n> - circulargenerator `-s`", "fa_icon": "fas fa-bullseye" + }, + "fasta_circularmapper_elongationfactor": { + "type": "integer", + "default": 500, + "description": "Specify the number of bases to extend reference by (circularmapper only).", + "help_text": "The number of bases to extend the beginning and end of each reference genome with.", + "fa_icon": "fas fa-external-link-alt" + }, + "fasta_circularmapper_elongatedfasta": { + "type": "string", + "description": "Specify an elongated reference FASTA to be used for circularmapper.", + "help_text": "Specify an already elongated FASTA file for circularmapper to avoid regeneration.", + "fa_icon": "fas fa-address-book" + }, + "fasta_circularmapper_elongatedindex": { + "type": "string", + "description": "Specify a samtools index for the elongated FASTA file.", + "help_text": "Specify the index for an already elongated FASTA file to avoid regeneration.", + "fa_icon": "fas fa-address-book" } } }, @@ -506,12 +531,6 @@ "help_text": "Specify which mapping tool to use. Options are BWA aln ('`bwaaln`'), BWA mem ('`bwamem`'), circularmapper ('`circularmapper`'), or Bowtie 2 ('`bowtie2`'). BWA aln is the default and highly suited for short-read ancient DNA. BWA mem can be quite useful for modern DNA, but is rarely used in projects for ancient DNA. CircularMapper enhances the mapping procedure to circular references, using the BWA algorithm but utilizing an extend-remap procedure (see [Peltzer et al 2016](https://doi.org/10.1186/s13059-016-0918-z) for details). Bowtie 2 is similar to BWA aln, and has recently been suggested to provide slightly better results under certain conditions ([Poullet and Orlando 2020](https://doi.org/10.3389/fevo.2020.00105)), as well as providing extra functionality (such as FASTQ trimming).\n\nMore documentation can be seen for each tool under:\n\n- [BWA aln](http://bio-bwa.sourceforge.net/bwa.shtml#3)\n- [BWA mem](http://bio-bwa.sourceforge.net/bwa.shtml#3)\n- [CircularMapper](https://circularmapper.readthedocs.io/en/latest/contents/userguide.html)\n- [Bowtie 2](http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml#command-line)", "fa_icon": "fas fa-hammer" }, - "fasta_largeref": { - "type": "boolean", - "description": "Specify to generate '.csi' BAM indices instead of '.bai' for larger reference genomes.", - "help_text": "This parameter is required to be set for large reference genomes. If your reference genome is larger than 3.5GB, the `samtools index` calls in the pipeline need to generate `.csi` indices instead of `.bai` indices to compensate for the size of the reference genome (with samtools: `-c`). This parameter is not required for smaller references (including the human reference genomes hg19 or grch37/grch38).", - "fa_icon": "fas fa-address-book" - }, "mapping_bwaaln_n": { "type": "number", "default": 0.01, @@ -601,9 +620,15 @@ "mapping_bowtie2_maxins": { "type": "integer", "default": 500, - "description": "Specify the maximum fragment length for Bowtie 2 paired-end mapping mode only.", - "help_text": "Specify the maximum fragment length for valid paired-end alignments in Bowtie 2. This only applies to paired-end mapping (i.e. unmerged) and is therefore typically only useful for modern data.\n\n> Modifies Bowtie 2 parameter: `--maxins`", - "fa_icon": "fas fa-filter" + "description": "Specify the maximum fragment length for Bowtie2 paired-end mapping mode only.", + "help_text": "The maximum fragment for valid paired-end alignments. Only for paired-end mapping (i.e. unmerged), and therefore typically only useful for modern data.\n\n> Modifies Bowtie2 parameter: `--maxins`", + "fa_icon": "fas fa-exchange-alt" + }, + "mapping_circularmapper_circularfilter": { + "type": "boolean", + "fa_icon": "fas fa-filter", + "description": "Turn on to remove reads that did not map to the circularised genome.", + "help_text": "If you want to filter out reads that don't map to elongated/circularised chromosome (and also non-circular chromosome headers) from the resulting BAM file, turn this on.\n\n> Modifies `-f` and `-x` parameters of CircularMapper's RealignSAMFile" } }, "fa_icon": "fas fa-layer-group" @@ -1398,14 +1423,11 @@ { "$ref": "#/definitions/host_removal" }, - { - "$ref": "#/definitions/human_sex_determination" - }, { "$ref": "#/definitions/contamination_estimation" }, { - "$ref": "#/definitions/contamination_estimation" + "$ref": "#/definitions/human_sex_determination" } ] } diff --git a/subworkflows/local/circularmapper.nf b/subworkflows/local/circularmapper.nf new file mode 100644 index 000000000..46874f987 --- /dev/null +++ b/subworkflows/local/circularmapper.nf @@ -0,0 +1,72 @@ +// +// Run circularmapper +// + +include { FASTQ_ALIGN_BWAALN as FASTQ_ALIGN_BWAALN_ELONGATED } from '../../subworkflows/nf-core/fastq_align_bwaaln/main' +include { CIRCULARMAPPER_REALIGNSAMFILE } from '../../modules/nf-core/circularmapper/realignsamfile/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_REALIGNED } from '../../modules/nf-core/samtools/index/main' +include { addNewMetaFromAttributes } from '../../subworkflows/local/utils_nfcore_eager_pipeline/main' + +workflow CIRCULARMAPPER { + take: + ch_reference // channel (mandatory): [ val(meta), path(index), path(reference) ] + ch_elongated_reference // channel (mandatory): [ val(meta), path(elongated_index) ] + ch_elongated_chr_list // channel (mandatory): [ val(meta), path(elongated_chr_list) ] + ch_fastq_reads // channel (mandatory): [ val(meta), path(reads) ]. subworkImportant: meta REQUIRES single_end` entry! + val_elongation_factor // int (mandatory): Elongation factor used for chromosome circularisation + + main: + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + ch_realigned_bams = Channel.empty() + ch_realigned_bais = Channel.empty() + ch_realigned_csis = Channel.empty() + + // Although mapping with BWA will need the elongated reference index, RealignSAMFile apparently does NOT need the elongated reference to be present, only the elongation factor. + FASTQ_ALIGN_BWAALN_ELONGATED( ch_fastq_reads, ch_elongated_reference ) + ch_versions = ch_versions.mix( FASTQ_ALIGN_BWAALN_ELONGATED.out.versions.first() ) + + ch_ref_for_realignsamfile = ch_reference + .join( ch_elongated_chr_list ) + .map { + meta, index, reference, elongated_chr_list -> + [ meta, reference, elongated_chr_list ] + } + .map { + // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute + addNewMetaFromAttributes( it, "id" , "reference" , false ) + } + + ch_input_for_realignsamfile = FASTQ_ALIGN_BWAALN_ELONGATED.out.bam + .map{ + // create meta consistent with rest of MAP workflow + meta, bam -> + new_meta = meta + [ reference: meta.id_index ] + [ new_meta, bam ] + } + .map { + // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute + addNewMetaFromAttributes( it, "reference" , "reference" , false ) + } + .combine( ch_ref_for_realignsamfile, by: 0 ) + .multiMap { + ignore_me, meta, bam, ref_meta, ref_fasta, elongated_chr_list -> + bam: [ meta, bam ] + fasta: [ ref_meta, ref_fasta ] + chr_list: [ ref_meta, elongated_chr_list ] + } + + CIRCULARMAPPER_REALIGNSAMFILE( ch_input_for_realignsamfile.bam, ch_input_for_realignsamfile.fasta, [ [], val_elongation_factor ], ch_input_for_realignsamfile.chr_list ) + ch_versions = ch_versions.mix( CIRCULARMAPPER_REALIGNSAMFILE.out.versions.first() ) + ch_realigned_bams = CIRCULARMAPPER_REALIGNSAMFILE.out.bam + + SAMTOOLS_INDEX_REALIGNED( ch_realigned_bams ) + ch_versions = ch_versions.mix( SAMTOOLS_INDEX_REALIGNED.out.versions.first() ) + ch_realigned_bais = params.fasta_largeref ? SAMTOOLS_INDEX_REALIGNED.out.csi : SAMTOOLS_INDEX_REALIGNED.out.bai + + emit: + bam = ch_realigned_bams // [ meta, bam ] + bai = ch_realigned_bais // [ meta, bai/csi ] + versions = ch_versions + mqc = ch_multiqc_files +} diff --git a/subworkflows/local/elongate_reference.nf b/subworkflows/local/elongate_reference.nf new file mode 100644 index 000000000..bf020b684 --- /dev/null +++ b/subworkflows/local/elongate_reference.nf @@ -0,0 +1,149 @@ +// +// Elongate a reference genome by circularising the target sequence by a given elongation factor. +// + +include { GUNZIP as GUNZIP_ELONGATED_FASTA } from '../../modules/nf-core/gunzip/main' +include { CIRCULARMAPPER_CIRCULARGENERATOR } from '../../modules/nf-core/circularmapper/circulargenerator/main' +include { BWA_INDEX as BWA_INDEX_CIRCULARISED } from '../../modules/nf-core/bwa/index/main' +include { addNewMetaFromAttributes } from '../../subworkflows/local/utils_nfcore_eager_pipeline/main' + +workflow ELONGATE_REFERENCE { + take: + ch_reference // [ meta, fasta, fai, dict, mapindex ] + ch_elongated_reference // [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_index ] + + main: + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + ch_circular_reference = Channel.empty() + ch_elongated_unzipped = Channel.empty() + ch_elongated_chr = Channel.empty() + + // Check if the provided elongated reference is gzipped, and if so, unzip it. + ch_elongated_branches = ch_elongated_reference + .branch { + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_index -> + for_gunzip: circularmapper_elongated_fasta != '' && circularmapper_elongated_fasta.extension == "gz" + skip_gunzip: true + } + + ch_elongated_for_gunzip = ch_elongated_branches.for_gunzip + .map { + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_index -> + [ meta, circularmapper_elongated_fasta ] + } + + GUNZIP_ELONGATED_FASTA( ch_elongated_for_gunzip ) + ch_versions = ch_versions.mix( GUNZIP_ELONGATED_FASTA.out.versions.first() ) + + ch_elongated_unzipped = ch_elongated_reference + .join( GUNZIP_ELONGATED_FASTA.out.gunzip ) + .map { + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_index, unzipped_fasta -> + def final_fasta = unzipped_fasta ?: circularmapper_elongated_fasta + [ meta, circular_target, unzipped_fasta, circularmapper_elongated_index ] + } + .mix( ch_elongated_branches.skip_gunzip ) + + /* + Check what fasta files we have. + There are four options: + 1. Elongated reference with index (ignore circular target) -> Pass through + 2. Elongated reference without index (ignore circular target) -> Index and emit + 3. No elongated reference, but circular target -> Elongate, index and emit. + 4. None of the above -> Throw error and stop execution during parameter validation + */ + + ch_circulargenerator_input = ch_elongated_unzipped + .branch{ + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_index -> + ready: circularmapper_elongated_fasta != "" && circularmapper_elongated_index != "" + needs_index: circularmapper_elongated_fasta != "" && circularmapper_elongated_index == "" + needs_elongation: circularmapper_elongated_fasta == "" && circular_target != "" + } + + /* References that are already elongated, need ch_elongated_chr to be created from the circular target information + 1) Get the reference information ready for joinin with the new channel. + 2) Take all subchannels from the multiMap that do not go through CircularGenerator (.ready,.needs_index) and infer the name of the elongated_chr_list expected by RealignSAMFile + 3) Put the circular target in a file of that name FOR EACH REFERENCE. The resulting channel has no meta, so we need to add it. + 4) Add meta, and use to merge back to the reference channel. This way we can take the original reference's meta. + + This is a bit convoluted, but it should work. Would be simpler if I could create the meta within collectFile, but I did not find a way to do that. + */ + ch_ref_for_chr_list = ch_reference + .map { + addNewMetaFromAttributes( it, "id", "id", false ) + } + + ch_chr_list_for_already_elongated_ref = ch_circulargenerator_input.ready + .mix( ch_circulargenerator_input.needs_index ) + .join( ch_reference ) + .map { + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_index, fasta, fai, dict, mapindex -> + [ meta, fasta, circular_target ] + } + .collectFile { + meta, fasta, circular_target -> + [ "${fasta.name}_500_elongated", circular_target + '\n' ] + } + .map { + file -> + def id = file.getSimpleName() + [ [id: id ], file ] + } + .join(ch_ref_for_chr_list) + .map { + ignore_me, chr_list, meta, fasta, fai, dict, mapindex -> + [ meta, chr_list ] + } + + // Elongate references that need it + // Join the original references to the branch of needs_elongation, to get the original fasta files, and elongate them. + ch_references_to_elongate = ch_circulargenerator_input.needs_elongation + .join( ch_reference ) + .multiMap { + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_index, fasta, fai, dict, mapindex -> + + def elongation_factor = params.fasta_circularmapper_elongationfactor + + fasta: [ meta, fasta ] + elongation_factor : [ meta, elongation_factor ] + target: [ meta, circular_target ] + } + + CIRCULARMAPPER_CIRCULARGENERATOR( + ch_references_to_elongate.fasta, + ch_references_to_elongate.elongation_factor, + ch_references_to_elongate.target + ) + ch_elongated_chr = ch_chr_list_for_already_elongated_ref.mix(CIRCULARMAPPER_CIRCULARGENERATOR.out.elongated) + ch_versions = ch_versions.mix( CIRCULARMAPPER_CIRCULARGENERATOR.out.versions.first() ) + + // Collect newly generated circular references and provided ones without an index, and index them. + ch_input_for_circular_indexing = ch_circulargenerator_input.needs_index + .map { + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_index -> + [ meta, circularmapper_elongated_fasta ] + } + .mix( CIRCULARMAPPER_CIRCULARGENERATOR.out.fasta ) + + BWA_INDEX_CIRCULARISED(ch_input_for_circular_indexing) + ch_versions = ch_versions.mix( BWA_INDEX_CIRCULARISED.out.versions.first() ) + + ch_indexed_references = ch_input_for_circular_indexing + .join( BWA_INDEX_CIRCULARISED.out.index ) + + // Then put all the indexed elongated references together, replace any zipped ones with the unzipped version, and emit them + ch_circular_reference = ch_circulargenerator_input.ready + .map { + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_index -> + [ meta, circularmapper_elongated_fasta, circularmapper_elongated_index ] + } + .mix( ch_indexed_references ) + + emit: + circular_reference = ch_circular_reference // [ meta, fasta, index ] + elongated_chr_list = ch_elongated_chr // [ meta, elongated_chr_list ] + versions = ch_versions + mqc = ch_multiqc_files +} diff --git a/subworkflows/local/map.nf b/subworkflows/local/map.nf index 7aa267d37..10972a2a7 100644 --- a/subworkflows/local/map.nf +++ b/subworkflows/local/map.nf @@ -2,21 +2,24 @@ // Prepare reference indexing for downstream // -include { SEQKIT_SPLIT2 } from '../../modules/nf-core/seqkit/split2/main' -include { FASTQ_ALIGN_BWAALN } from '../../subworkflows/nf-core/fastq_align_bwaaln/main' -include { BWA_MEM } from '../../modules/nf-core/bwa/mem/main' -include { BOWTIE2_ALIGN } from '../../modules/nf-core/bowtie2/align/main' -include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_LANES } from '../../modules/nf-core/samtools/merge/main' -include { SAMTOOLS_SORT as SAMTOOLS_SORT_MERGED_LANES } from '../../modules/nf-core/samtools/sort/main' -include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MEM } from '../../modules/nf-core/samtools/index/main' -include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_BT2 } from '../../modules/nf-core/samtools/index/main' -include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MERGED_LANES } from '../../modules/nf-core/samtools/index/main' -include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_MAPPED } from '../../modules/nf-core/samtools/flagstat/main' +include { SEQKIT_SPLIT2 } from '../../modules/nf-core/seqkit/split2/main' +include { FASTQ_ALIGN_BWAALN } from '../../subworkflows/nf-core/fastq_align_bwaaln/main' +include { BWA_MEM } from '../../modules/nf-core/bwa/mem/main' +include { BOWTIE2_ALIGN } from '../../modules/nf-core/bowtie2/align/main' +include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_LANES } from '../../modules/nf-core/samtools/merge/main' +include { SAMTOOLS_SORT as SAMTOOLS_SORT_MERGED_LANES } from '../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MEM } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_BT2 } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MERGED_LANES } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_MERGED_LANES } from '../../modules/nf-core/samtools/flagstat/main' +include { CIRCULARMAPPER } from '../../subworkflows/local/circularmapper' workflow MAP { take: - reads // [ [meta], [read1, reads2] ] or [ [meta], [read1] ] - index // [ [meta], [ index ], [ fasta ] ] + reads // [ [meta], [read1, reads2] ] or [ [meta], [read1] ] + index // [ [meta], [ index ], [ fasta ] ] + elogated_index // [ [meta], circularmapper_elongated_fasta, circularmapper_elongated_index ] + elongated_chr_list // [ [meta], elongated_chr_list ] main: ch_versions = Channel.empty() @@ -113,8 +116,21 @@ workflow MAP { SAMTOOLS_INDEX_BT2 ( ch_mapped_lane_bam ) ch_versions = ch_versions.mix(SAMTOOLS_INDEX_BT2.out.versions.first()) ch_mapped_lane_bai = params.fasta_largeref ? SAMTOOLS_INDEX_BT2.out.csi : SAMTOOLS_INDEX_BT2.out.bai + + } else if ( params.mapping_tool == 'circularmapper' ) { + ch_elongated_reference_for_mapping = elogated_index + .map { + meta, elongated_fasta, elongated_index -> + [ meta, elongated_index ] + } + + CIRCULARMAPPER( index, ch_elongated_reference_for_mapping, elongated_chr_list, reads, params.fasta_circularmapper_elongationfactor ) + ch_versions = ch_versions.mix ( CIRCULARMAPPER.out.versions ) + ch_mapped_lane_bam = CIRCULARMAPPER.out.bam + ch_mapped_lane_bai = CIRCULARMAPPER.out.bai // [ [ meta ], bai/csi ] } + // Only run merge lanes if we have more than one BAM to merge! ch_input_for_lane_merge = ch_mapped_lane_bam .map { @@ -144,16 +160,16 @@ workflow MAP { ch_mapped_bai = params.fasta_largeref ? SAMTOOLS_INDEX_MERGED_LANES.out.csi : SAMTOOLS_INDEX_MERGED_LANES.out.bai ch_versions.mix( SAMTOOLS_INDEX_MERGED_LANES.out.versions ) - ch_input_for_flagstat = SAMTOOLS_SORT_MERGED_LANES.out.bam.join( SAMTOOLS_INDEX_MERGED_LANES.out.bai, failOnMismatch: true ) + ch_input_for_flagstat = ch_mapped_bam.join( ch_mapped_bai, failOnMismatch: true ) - SAMTOOLS_FLAGSTAT_MAPPED ( ch_input_for_flagstat ) - ch_versions.mix( SAMTOOLS_FLAGSTAT_MAPPED.out.versions.first() ) - ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_FLAGSTAT_MAPPED.out.flagstat ) + SAMTOOLS_FLAGSTAT_MERGED_LANES ( ch_input_for_flagstat ) + ch_versions.mix( SAMTOOLS_FLAGSTAT_MERGED_LANES .out.versions.first() ) + ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_FLAGSTAT_MERGED_LANES .out.flagstat ) emit: bam = ch_mapped_bam // [ [ meta ], bam ] - bai = ch_mapped_bai // [ [ meta ], bai ] - flagstat = SAMTOOLS_FLAGSTAT_MAPPED.out.flagstat // [ [ meta ], stats ] + bai = ch_mapped_bai // [ [ meta ], bai/csi ] + flagstat = SAMTOOLS_FLAGSTAT_MERGED_LANES .out.flagstat // [ [ meta ], stats ] mqc = ch_multiqc_files versions = ch_versions diff --git a/subworkflows/local/reference_indexing.nf b/subworkflows/local/reference_indexing.nf index cdf8d0e90..0080c75cf 100644 --- a/subworkflows/local/reference_indexing.nf +++ b/subworkflows/local/reference_indexing.nf @@ -2,11 +2,12 @@ // Prepare reference indexing for downstream // -include { REFERENCE_INDEXING_SINGLE } from '../../subworkflows/local/reference_indexing_single.nf' -include { REFERENCE_INDEXING_MULTI } from '../../subworkflows/local/reference_indexing_multi.nf' -include { GUNZIP as GUNZIP_PMDBED } from '../../modules/nf-core/gunzip/main.nf' -include { GUNZIP as GUNZIP_PMDFASTA } from '../../modules/nf-core/gunzip/main.nf' -include { GUNZIP as GUNZIP_SNPBED } from '../../modules/nf-core/gunzip/main.nf' +include { REFERENCE_INDEXING_SINGLE } from '../../subworkflows/local/reference_indexing_single.nf' +include { REFERENCE_INDEXING_MULTI } from '../../subworkflows/local/reference_indexing_multi.nf' +include { GUNZIP as GUNZIP_PMDBED } from '../../modules/nf-core/gunzip/main.nf' +include { GUNZIP as GUNZIP_PMDFASTA } from '../../modules/nf-core/gunzip/main.nf' +include { GUNZIP as GUNZIP_SNPBED } from '../../modules/nf-core/gunzip/main.nf' +include { ELONGATE_REFERENCE } from '../../subworkflows/local/elongate_reference.nf' workflow REFERENCE_INDEXING { take: @@ -20,12 +21,13 @@ workflow REFERENCE_INDEXING { // Warn user if they've given a reference sheet that already includes fai/dict/mapper index etc. if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && ( fasta_fai || fasta_dict || fasta_mapperindexdir )) log.warn("A TSV or CSV has been supplied to `--fasta_sheet` as well as e.g. `--fasta_fai`. --fasta_sheet CSV/TSV takes priority and --fasta_* parameters will be ignored.") - if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && ( params.mitochondrion_header || params.contamination_estimation_angsd_hapmap || params.damage_manipulation_pmdtools_reference_mask || params.damage_manipulation_pmdtools_reference_mask || params.snpcapture_bed || params.genotyping_pileupcaller_bedfile || params.genotyping_pileupcaller_snpfile || params.sexdeterrmine_bedfile || params.mapstats_bedtools_featurefile || params.genotyping_reference_ploidy || params.genotyping_gatk_dbsnp )) log.warn("A TSV or CSV has been supplied to `--fasta_sheet` as well as individual reference-specific input files, e.g. `--contamination_estimation_angsd_hapmap`. Input files specified in the --fasta_sheet CSV/TSV take priority and other input parameters will be ignored.") + if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && ( params.mitochondrion_header || params.contamination_estimation_angsd_hapmap || params.damage_manipulation_pmdtools_reference_mask || params.damage_manipulation_pmdtools_reference_mask || params.snpcapture_bed || params.genotyping_pileupcaller_bedfile || params.genotyping_pileupcaller_snpfile || params.sexdeterrmine_bedfile || params.mapstats_bedtools_featurefile || params.genotyping_reference_ploidy || params.genotyping_gatk_dbsnp || params.fasta_circular_target || params.circularmapper_elongated_fasta || params.circularmapper_elongated_fai )) log.warn("A TSV or CSV has been supplied to `--fasta_sheet` as well as individual reference-specific input files, e.g. `--contamination_estimation_angsd_hapmap`. Input files specified in the --fasta_sheet CSV/TSV take priority and other input parameters will be ignored.") if ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) { // If input (multi-)reference sheet supplied REFERENCE_INDEXING_MULTI ( fasta ) ch_reference_for_mapping = REFERENCE_INDEXING_MULTI.out.reference + ch_reference_to_elongate = REFERENCE_INDEXING_MULTI.out.elongated_reference ch_mitochondrion_header = REFERENCE_INDEXING_MULTI.out.mitochondrion_header ch_hapmap = REFERENCE_INDEXING_MULTI.out.hapmap ch_pmd_masked_fasta = REFERENCE_INDEXING_MULTI.out.pmd_masked_fasta @@ -39,6 +41,7 @@ workflow REFERENCE_INDEXING { } else { // If input FASTA and/or indicies supplied REFERENCE_INDEXING_SINGLE ( fasta, fasta_fai, fasta_dict, fasta_mapperindexdir ) + ch_reference_to_elongate = REFERENCE_INDEXING_SINGLE.out.elongated_reference ch_mitochondrion_header = REFERENCE_INDEXING_SINGLE.out.mitochondrion_header ch_hapmap = REFERENCE_INDEXING_SINGLE.out.hapmap ch_pmd_masked_fasta = REFERENCE_INDEXING_SINGLE.out.pmd_masked_fasta @@ -125,17 +128,41 @@ workflow REFERENCE_INDEXING { ch_dbsnp = ch_dbsnp .filter { it[1] != "" } + // Elongate reference for circularmapper if requested + if ( params.mapping_tool == "circularmapper" ) { + // Throw errors if required parameters are missing + // A circular target is required even when an elongated reference has been provided. + ch_elongated_for_gunzip = ch_reference_to_elongate + .filter{ + meta, circular_target, circularmapper_elongatedfasta, circularmapper_elongatedindex -> + circular_target != "" + } + .ifEmpty{ error "[nf-core/eager] ERROR: Mapping with circularmapper requires either a circular target for at least one reference." } + + // This ELONGATE_REFERENCE subworkflow also checks if the provided reference is gzipped, and unzips it if necessary. + ELONGATE_REFERENCE( ch_reference_for_mapping, ch_reference_to_elongate ) + ch_version = ch_versions.mix( ELONGATE_REFERENCE.out.versions ) + ch_elongated_indexed_reference = ELONGATE_REFERENCE.out.circular_reference + ch_elongated_chr_list = ELONGATE_REFERENCE.out.elongated_chr_list + + } else { + ch_elongated_indexed_reference = ch_reference_to_elongate + ch_elongated_chr_list = Channel.empty() + } + emit: - reference = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex, circular_target ] - mitochondrion_header = ch_mitochondrion_header // [ meta, mitochondrion_header ] - hapmap = ch_hapmap // [ meta, hapmap ] - pmd_masking = ch_pmd_masking // [ meta, pmd_masked_fasta, pmd_bed_for_masking ] - pmd_bed_for_masking = ch_pmd_bed_for_masking // [ meta, pmd_bed_for_masking ] - snp_capture_bed = ch_capture_bed // [ meta, capture_bed ] - pileupcaller_bed_snp = ch_pileupcaller_bed_snp // [ meta, pileupcaller_bed, pileupcaller_snp ] - sexdeterrmine_bed = ch_sexdeterrmine_bed // [ meta, sexdet_bed ] - bedtools_feature = ch_bedtools_feature // [ meta, bedtools_feature ] - dbsnp = ch_dbsnp // [ meta, dbsnp ] + reference = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex ] + elongated_reference = ch_elongated_indexed_reference // [ meta, circularmapper_elongated_fasta, circularmapper_elongated_index ] + elongated_chr_list = ch_elongated_chr_list // [ meta, elongated_chr_list ] + mitochondrion_header = ch_mitochondrion_header // [ meta, mitochondrion_header ] + hapmap = ch_hapmap // [ meta, hapmap ] + pmd_masking = ch_pmd_masking // [ meta, pmd_masked_fasta, pmd_bed_for_masking ] + pmd_bed_for_masking = ch_pmd_bed_for_masking // [ meta, pmd_bed_for_masking ] + snp_capture_bed = ch_capture_bed // [ meta, capture_bed ] + pileupcaller_bed_snp = ch_pileupcaller_bed_snp // [ meta, pileupcaller_bed, pileupcaller_snp ] + sexdeterrmine_bed = ch_sexdeterrmine_bed // [ meta, sexdet_bed ] + bedtools_feature = ch_bedtools_feature // [ meta, bedtools_feature ] + dbsnp = ch_dbsnp // [ meta, dbsnp ] versions = ch_versions } diff --git a/subworkflows/local/reference_indexing_multi.nf b/subworkflows/local/reference_indexing_multi.nf index 6a42d9208..00276a5b9 100644 --- a/subworkflows/local/reference_indexing_multi.nf +++ b/subworkflows/local/reference_indexing_multi.nf @@ -7,7 +7,6 @@ include { BWA_INDEX } from '../../modules/nf-core/bwa/inde include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main' include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' include { PICARD_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/picard/createsequencedictionary/main' -// TODO missing: circulargeneraotr? workflow REFERENCE_INDEXING_MULTI { @@ -20,23 +19,25 @@ workflow REFERENCE_INDEXING_MULTI { // Import reference sheet and change empty arrays to empty strings for compatibility with single reference input ch_splitreferencesheet_for_branch = Channel.fromSamplesheet("fasta_sheet") .map{ - meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion, capture_bed, pileupcaller_bed, pileupcaller_snp, hapmap, pmd_masked_fasta, pmd_bed_for_masking, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp -> - meta.ploidy = meta.genotyping_ploidy != null ? meta.genotyping_ploidy : params.genotyping_reference_ploidy - fai = fai != [] ? fai : "" - dict = dict != [] ? dict : "" - mapper_index = mapper_index != [] ? mapper_index : "" - circular_target = circular_target != [] ? circular_target : "" - mitochondrion = mitochondrion != [] ? mitochondrion : "" - capture_bed = capture_bed != [] ? capture_bed : "" - pileupcaller_bed = pileupcaller_bed != [] ? pileupcaller_bed : "" - pileupcaller_snp = pileupcaller_snp != [] ? pileupcaller_snp : "" - hapmap = hapmap != [] ? hapmap : "" - pmd_masked_fasta = pmd_masked_fasta != [] ? pmd_masked_fasta : "" - pmd_bed_for_masking = pmd_bed_for_masking != [] ? pmd_bed_for_masking : "" - sexdet_bed = sexdet_bed != [] ? sexdet_bed : "" - bedtools_feature = bedtools_feature != [] ? bedtools_feature : "" - genotyping_gatk_dbsnp = genotyping_gatk_dbsnp != [] ? genotyping_gatk_dbsnp : "" - [ meta - meta.subMap('genotyping_ploidy'), fasta, fai, dict, mapper_index, circular_target, mitochondrion, capture_bed, pileupcaller_bed, pileupcaller_snp, hapmap, pmd_masked_fasta, pmd_bed_for_masking, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp ] + meta, fasta, fai, dict, mapper_index, circular_target, circularmapper_elongatedfasta, circularmapper_elongatedindex, mitochondrion, capture_bed, pileupcaller_bed, pileupcaller_snp, hapmap, pmd_masked_fasta, pmd_bed_for_masking, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp -> + meta.ploidy = meta.genotyping_ploidy != null ? meta.genotyping_ploidy : params.genotyping_reference_ploidy + fai = fai != [] ? fai : "" + dict = dict != [] ? dict : "" + mapper_index = mapper_index != [] ? mapper_index : "" + circular_target = circular_target != [] ? circular_target : "" + circularmapper_elongatedfasta = circularmapper_elongatedfasta != [] ? circularmapper_elongatedfasta : "" + circularmapper_elongatedindex = circularmapper_elongatedindex != [] ? circularmapper_elongatedindex : "" + mitochondrion = mitochondrion != [] ? mitochondrion : "" + capture_bed = capture_bed != [] ? capture_bed : "" + pileupcaller_bed = pileupcaller_bed != [] ? pileupcaller_bed : "" + pileupcaller_snp = pileupcaller_snp != [] ? pileupcaller_snp : "" + hapmap = hapmap != [] ? hapmap : "" + pmd_masked_fasta = pmd_masked_fasta != [] ? pmd_masked_fasta : "" + pmd_bed_for_masking = pmd_bed_for_masking != [] ? pmd_bed_for_masking : "" + sexdet_bed = sexdet_bed != [] ? sexdet_bed : "" + bedtools_feature = bedtools_feature != [] ? bedtools_feature : "" + genotyping_gatk_dbsnp = genotyping_gatk_dbsnp != [] ? genotyping_gatk_dbsnp : "" + [ meta - meta.subMap('genotyping_ploidy'), fasta, fai, dict, mapper_index, circular_target, circularmapper_elongatedfasta, circularmapper_elongatedindex, mitochondrion, capture_bed, pileupcaller_bed, pileupcaller_snp, hapmap, pmd_masked_fasta, pmd_bed_for_masking, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp ] } // GENERAL DESCRIPTION FOR NEXT SECTIONS @@ -52,8 +53,9 @@ workflow REFERENCE_INDEXING_MULTI { ch_input_from_referencesheet = ch_splitreferencesheet_for_branch .multiMap { - meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion, capture_bed, pileupcaller_bed, pileupcaller_snp, hapmap, pmd_masked_fasta, pmd_bed_for_masking, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp -> - generated: [ meta, fasta, fai, dict, mapper_index, circular_target ] + meta, fasta, fai, dict, mapper_index, circular_target, circularmapper_elongatedfasta, circularmapper_elongatedindex, mitochondrion, capture_bed, pileupcaller_bed, pileupcaller_snp, hapmap, pmd_masked_fasta, pmd_bed_for_masking, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp -> + generated: [ meta, fasta, fai, dict, mapper_index ] + circularmapper: [ meta, circular_target, circularmapper_elongatedfasta, circularmapper_elongatedindex ] mitochondrion_header: [ meta, mitochondrion ] angsd_hapmap: [ meta, hapmap ] pmd_masked_fasta: [ meta, pmd_masked_fasta ] @@ -68,7 +70,7 @@ workflow REFERENCE_INDEXING_MULTI { // Detect if fasta is gzipped or not ch_fasta_for_gunzip = ch_input_from_referencesheet.generated .branch { - meta, fasta, fai, dict, mapper_index, circular_target -> + meta, fasta, fai, dict, mapper_index -> forgunzip: fasta.extension == "gz" skip: true } @@ -76,9 +78,9 @@ workflow REFERENCE_INDEXING_MULTI { // Pull out name/file to match cardinality for GUNZIP module ch_gunzip_input = ch_fasta_for_gunzip.forgunzip .multiMap { - meta, fasta, fai, dict, mapper_index, circular_target -> + meta, fasta, fai, dict, mapper_index -> gunzip: [ meta, fasta ] - remainder: [ meta, fai, dict, mapper_index, circular_target ] + remainder: [ meta, fai, dict, mapper_index ] } @@ -96,7 +98,7 @@ workflow REFERENCE_INDEXING_MULTI { // Separate out non-faidxed references ch_fasta_for_faidx = ch_fasta_for_faiindexing .branch { - meta, fasta, fai, dict, mapper_index, circular_target -> + meta, fasta, fai, dict, mapper_index -> forfaidx: fai == "" skip: true } @@ -105,9 +107,9 @@ workflow REFERENCE_INDEXING_MULTI { ch_faidx_input = ch_fasta_for_faidx .forfaidx .multiMap { - meta, fasta, fai, dict, mapper_index, circular_target -> + meta, fasta, fai, dict, mapper_index -> faidx: [ meta, fasta ] - remainder: [ meta, fasta, dict, mapper_index, circular_target ] // we drop fai here as we are going to make it + remainder: [ meta, fasta, dict, mapper_index ] // we drop fai here as we are going to make it } SAMTOOLS_FAIDX ( ch_faidx_input.faidx, [ [], [] ] ) @@ -117,9 +119,9 @@ workflow REFERENCE_INDEXING_MULTI { ch_faidxed_formix = SAMTOOLS_FAIDX.out.fai .join( ch_faidx_input.remainder, failOnMismatch: true ) .map { - meta, fai, fasta, dict, mapper_index, circular_target -> + meta, fai, fasta, dict, mapper_index -> - [ meta, fasta, fai, dict, mapper_index, circular_target ] + [ meta, fasta, fai, dict, mapper_index ] } // Mix back newly faidx'd references with the pre-indexed ones @@ -131,7 +133,7 @@ workflow REFERENCE_INDEXING_MULTI { ch_fasta_for_dict = ch_fasta_for_dictindexing .branch { - meta, fasta, fai, dict, mapper_index, circular_target -> + meta, fasta, fai, dict, mapper_index -> fordict: dict == "" skip: true } @@ -139,9 +141,9 @@ workflow REFERENCE_INDEXING_MULTI { ch_dict_input = ch_fasta_for_dict .fordict .multiMap { - meta, fasta, fai, dict, mapper_index, circular_target -> + meta, fasta, fai, dict, mapper_index -> dict: [ meta, fasta ] - remainder: [ meta, fasta, fai, mapper_index, circular_target ] + remainder: [ meta, fasta, fai, mapper_index ] } PICARD_CREATESEQUENCEDICTIONARY ( ch_dict_input.dict ) @@ -150,9 +152,9 @@ workflow REFERENCE_INDEXING_MULTI { ch_dicted_formix = PICARD_CREATESEQUENCEDICTIONARY.out.reference_dict .join( ch_dict_input.remainder, failOnMismatch: true ) .map { - meta, dict, fasta, fai, mapper_index, circular_target -> + meta, dict, fasta, fai, mapper_index -> - [ meta, fasta, fai, dict, mapper_index, circular_target ] + [ meta, fasta, fai, dict, mapper_index ] } ch_dict_formapperindexing = ch_fasta_for_dict.skip.mix(ch_dicted_formix) @@ -165,7 +167,7 @@ workflow REFERENCE_INDEXING_MULTI { ch_fasta_for_mapperindex = ch_dict_formapperindexing .branch { - meta, fasta, fai, dict, mapper_index, circular_target -> + meta, fasta, fai, dict, mapper_index -> forindex: mapper_index == "" skip: true } @@ -173,12 +175,12 @@ workflow REFERENCE_INDEXING_MULTI { ch_mapindex_input = ch_fasta_for_mapperindex .forindex .multiMap { - meta, fasta, fai, dict, mapper_index, circular_target -> + meta, fasta, fai, dict, mapper_index -> index: [ meta, fasta ] - remainder: [ meta, fasta, fai, dict, circular_target ] + remainder: [ meta, fasta, fai, dict ] } - if ( params.mapping_tool == "bwaaln" || params.mapping_tool == "bwamem" ) { + if ( params.mapping_tool == "bwaaln" || params.mapping_tool == "bwamem" || params.mapping_tool == "circularmapper" ) { BWA_INDEX ( ch_mapindex_input.index ) ch_version = ch_versions.mix( BWA_INDEX.out.versions ) ch_indexed_forremap = BWA_INDEX.out.index @@ -186,22 +188,21 @@ workflow REFERENCE_INDEXING_MULTI { BOWTIE2_BUILD ( ch_mapindex_input.index ) ch_version = ch_versions.mix( BOWTIE2_BUILD.out.versions ) ch_indexed_forremap = BOWTIE2_BUILD.out.index - } else if ( params.mapping_tool == "circularmapper" ) { - println("CircularMapper Indexing Not Yet Implemented") } ch_indexed_formix = ch_indexed_forremap .join( ch_mapindex_input.remainder, failOnMismatch: true ) .map { - meta, mapper_index, fasta, fai, dict, circular_target -> + meta, mapper_index, fasta, fai, dict -> - [ meta, fasta, fai, dict, mapper_index, circular_target ] + [ meta, fasta, fai, dict, mapper_index ] } ch_indexmapper_for_reference = ch_fasta_for_mapperindex.skip.mix(ch_indexed_formix) emit: - reference = ch_indexmapper_for_reference // [ meta, fasta, fai, dict, mapindex, circular_target ] + reference = ch_indexmapper_for_reference // [ meta, fasta, fai, dict, mapindex ] + elongated_reference = ch_input_from_referencesheet.circularmapper // [ meta, circular_target, circularmapper_elongatedfasta, circularmapper_elongatedindex ] mitochondrion_header = ch_input_from_referencesheet.mitochondrion_header // [ meta, mitochondrion ] hapmap = ch_input_from_referencesheet.angsd_hapmap // [ meta, hapmap ] pmd_masked_fasta = ch_input_from_referencesheet.pmd_masked_fasta // [ meta, pmd_masked_fasta ] diff --git a/subworkflows/local/reference_indexing_single.nf b/subworkflows/local/reference_indexing_single.nf index a21b4727d..41feced1e 100644 --- a/subworkflows/local/reference_indexing_single.nf +++ b/subworkflows/local/reference_indexing_single.nf @@ -8,7 +8,6 @@ include { BWA_INDEX } from '../../modules/nf-core/bwa/inde include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main' include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' include { PICARD_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/picard/createsequencedictionary/main' -// TODO missing: circulargeneraotr? workflow REFERENCE_INDEXING_SINGLE { @@ -52,7 +51,7 @@ workflow REFERENCE_INDEXING_SINGLE { } // Generate mapper indicies if not supplied, and if supplied generate meta - if ( params.mapping_tool == 'bwaaln' || params.mapping_tool == 'bwamem' ){ + if ( params.mapping_tool == 'bwaaln' || params.mapping_tool == 'bwamem' || params.mapping_tool == 'circularmapper' ){ if ( !fasta_mapperindexdir ) { ch_fasta_mapperindexdir = BWA_INDEX ( ch_ungz_ref ).index @@ -90,13 +89,16 @@ workflow REFERENCE_INDEXING_SINGLE { def bedtools_feature = params.mapstats_bedtools_featurefile != null ? file(params.mapstats_bedtools_featurefile, checkIfExists: true ) : "" def genotyping_reference_ploidy = params.genotyping_reference_ploidy def genotyping_gatk_dbsnp = params.genotyping_gatk_dbsnp != null ? file(params.genotyping_gatk_dbsnp, checkIfExists: true ) : "" - [ meta + [ ploidy: genotyping_reference_ploidy ], fasta, fai, dict, mapper_index, params.fasta_circular_target, params.mitochondrion_header, contamination_estimation_angsd_hapmap, pmd_masked_fasta, pmd_bed_for_masking, capture_bed, pileupcaller_bed, pileupcaller_snp, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp ] + def circularmapper_elongated_fasta = params.fasta_circularmapper_elongatedfasta != null ? file( params.fasta_circularmapper_elongatedfasta, checkIfExists: true ) : "" + def circularmapper_elongated_index = params.fasta_circularmapper_elongatedindex != null ? file( params.fasta_circularmapper_elongatedindex, checkIfExists: true ) : "" + [ meta + [ ploidy: genotyping_reference_ploidy ], fasta, fai, dict, mapper_index, params.fasta_circular_target, params.mitochondrion_header, contamination_estimation_angsd_hapmap, pmd_masked_fasta, pmd_bed_for_masking, capture_bed, pileupcaller_bed, pileupcaller_snp, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp, circularmapper_elongated_fasta, circularmapper_elongated_index ] } ch_ref_index_single = ch_reference_for_mapping .multiMap{ - meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion_header, contamination_estimation_angsd_hapmap, pmd_masked_fasta, pmd_bed_for_masking, capture_bed, pileupcaller_bed, pileupcaller_snp, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp -> - reference: [ meta, fasta, fai, dict, mapper_index, circular_target ] + meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion_header, contamination_estimation_angsd_hapmap, pmd_masked_fasta, pmd_bed_for_masking, capture_bed, pileupcaller_bed, pileupcaller_snp, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp, circularmapper_elongated_fasta, circularmapper_elongated_index -> + reference: [ meta, fasta, fai, dict, mapper_index ] + circularmapper: [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_index ] mito_header: [ meta, mitochondrion_header ] hapmap: [ meta, contamination_estimation_angsd_hapmap ] pmd_masked_fasta: [ meta, pmd_masked_fasta ] @@ -109,7 +111,8 @@ workflow REFERENCE_INDEXING_SINGLE { } emit: - reference = ch_ref_index_single.reference // [ meta, fasta, fai, dict, mapindex, circular_target ] + reference = ch_ref_index_single.reference // [ meta, fasta, fai, dict, mapindex ] + elongated_reference = ch_ref_index_single.circularmapper // [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_index ] mitochondrion_header = ch_ref_index_single.mito_header // [ meta, mito_header ] hapmap = ch_ref_index_single.hapmap // [ meta, hapmap ] pmd_masked_fasta = ch_ref_index_single.pmd_masked_fasta // [ meta, pmd_masked_fasta ] diff --git a/subworkflows/local/utils_nfcore_eager_pipeline/main.nf b/subworkflows/local/utils_nfcore_eager_pipeline/main.nf index 8809f7597..2d076cf17 100644 --- a/subworkflows/local/utils_nfcore_eager_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_eager_pipeline/main.nf @@ -231,7 +231,7 @@ def validateInputParameters() { if ( params.genotyping_source == 'pmd' && ! params.run_pmd_filtering ) { exit 1, ("[nf-core/eager] ERROR: --genotyping_source cannot be 'pmd' unless PMD-filtering is ran.") } if ( params.genotyping_source == 'rescaled' && ! params.run_mapdamage_rescaling ) { exit 1, ("[nf-core/eager] ERROR: --genotyping_source cannot be 'rescaled' unless aDNA damage rescaling is ran.") } if ( params.fasta && params.run_genotyping && params.genotyping_tool == 'pileupcaller' && ! (params.genotyping_pileupcaller_bedfile || params.genotyping_pileupcaller_snpfile ) ) { exit 1, ("[nf-core/eager] ERROR: Genotyping with pileupcaller requires both '--genotyping_pileupcaller_bedfile' AND '--genotyping_pileupcaller_snpfile' to be provided.") } - + if ( params.fasta && params.mapping_tool == "circularmapper" && !params.fasta_circular_target ) { exit 1, ("[nf-core/eager] ERROR: Mapping with circularmapper requires --fasta_circular_target to be provided.") } } // diff --git a/workflows/eager.nf b/workflows/eager.nf index fee976267..2804db5fd 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -181,11 +181,11 @@ workflow EAGER { // ch_reference_for_mapping = REFERENCE_INDEXING.out.reference .map{ - meta, fasta, fai, dict, index, circular_target -> + meta, fasta, fai, dict, index -> [ meta, index, fasta ] } - MAP ( ch_reads_for_mapping, ch_reference_for_mapping ) + MAP ( ch_reads_for_mapping, ch_reference_for_mapping, REFERENCE_INDEXING.out.elongated_reference, REFERENCE_INDEXING.out.elongated_chr_list ) ch_versions = ch_versions.mix( MAP.out.versions ) ch_multiqc_files = ch_multiqc_files.mix( MAP.out.mqc.collect{it[1]}.ifEmpty([]) ) @@ -247,7 +247,7 @@ workflow EAGER { ch_fasta_for_deduplication = REFERENCE_INDEXING.out.reference .multiMap{ - meta, fasta, fai, dict, index, circular_target -> + meta, fasta, fai, dict, index -> fasta: [ meta, fasta ] fasta_fai: [ meta, fai ] } @@ -500,7 +500,7 @@ workflow EAGER { ch_fasta_for_damagecalculation = REFERENCE_INDEXING.out.reference .multiMap{ - meta, fasta, fai, dict, index, circular_target -> + meta, fasta, fai, dict, index -> fasta: [ meta, fasta ] fasta_fai: [ meta, fai ] } @@ -567,7 +567,7 @@ workflow EAGER { ch_reference_for_genotyping = REFERENCE_INDEXING.out.reference // Remove unnecessary files from the reference channel, so SWF doesn't break with each change to reference channel. .map { - meta, fasta, fai, dict, mapindex, circular_target -> + meta, fasta, fai, dict, mapindex -> [ meta, fasta, fai, dict ] } GENOTYPE(