From 010ffcd02b5f2e8542e24257dd5db671c1711bf4 Mon Sep 17 00:00:00 2001 From: Judith Ballesteros Date: Fri, 14 Jun 2024 11:49:23 +0200 Subject: [PATCH 01/61] Starting subworkflow for circularmapper --- modules.json | 10 +++ .../circulargenerator/environment.yml | 9 +++ .../circularmapper/circulargenerator/main.nf | 50 ++++++++++++ .../circularmapper/circulargenerator/meta.yml | 55 +++++++++++++ .../realignsamfile/environment.yml | 7 ++ .../circularmapper/realignsamfile/main.nf | 57 +++++++++++++ .../circularmapper/realignsamfile/meta.yml | 58 +++++++++++++ subworkflows/local/circularmapper.nf | 81 +++++++++++++++++++ 8 files changed, 327 insertions(+) create mode 100644 modules/nf-core/circularmapper/circulargenerator/environment.yml create mode 100644 modules/nf-core/circularmapper/circulargenerator/main.nf create mode 100644 modules/nf-core/circularmapper/circulargenerator/meta.yml create mode 100644 modules/nf-core/circularmapper/realignsamfile/environment.yml create mode 100644 modules/nf-core/circularmapper/realignsamfile/main.nf create mode 100644 modules/nf-core/circularmapper/realignsamfile/meta.yml create mode 100644 subworkflows/local/circularmapper.nf diff --git a/modules.json b/modules.json index 7b4d35fce..fc368c50a 100644 --- a/modules.json +++ b/modules.json @@ -95,6 +95,16 @@ "git_sha": "02fd5bd7275abad27aad32d5c852e0a9b1b98882", "installed_by": ["modules"] }, + "circularmapper/circulargenerator": { + "branch": "master", + "git_sha": "5890d9e73aaa803fc6be94b1822539b4204d8cff", + "installed_by": ["modules"] + }, + "circularmapper/realignsamfile": { + "branch": "master", + "git_sha": "5890d9e73aaa803fc6be94b1822539b4204d8cff", + "installed_by": ["modules"] + }, "damageprofiler": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", diff --git a/modules/nf-core/circularmapper/circulargenerator/environment.yml b/modules/nf-core/circularmapper/circulargenerator/environment.yml new file mode 100644 index 000000000..f1e1201ef --- /dev/null +++ b/modules/nf-core/circularmapper/circulargenerator/environment.yml @@ -0,0 +1,9 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "circularmapper_circulargenerator" + +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::circularmapper=1.93.5 diff --git a/modules/nf-core/circularmapper/circulargenerator/main.nf b/modules/nf-core/circularmapper/circulargenerator/main.nf new file mode 100644 index 000000000..b1664d032 --- /dev/null +++ b/modules/nf-core/circularmapper/circulargenerator/main.nf @@ -0,0 +1,50 @@ +// This module does the following: +//creating a modified reference genome, with an elongation of the an specified amount of bases +process CIRCULARMAPPER_CIRCULARGENERATOR { + + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/circularmapper:1.93.5--h2a3209d_3': + 'biocontainers/circularmapper:1.93.5--h2a3209d_3' }" + + input: + tuple val(meta), path(reference) + val(elong) + + output: + tuple val(meta), path("*_${elong}.fasta"), emit: fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + circulargenerator -e ${elong} \ + -i ${reference} \ + -s ${prefix} \ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + circulargenerator: \$(circulargenerator -h | grep 'usage' | sed 's/usage: CircularGenerator//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_${elong}.fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + circulargenerator: \$(circulargenerator -h | grep 'usage' | sed 's/usage: CircularGenerator//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/circularmapper/circulargenerator/meta.yml b/modules/nf-core/circularmapper/circulargenerator/meta.yml new file mode 100644 index 000000000..3e6a51ada --- /dev/null +++ b/modules/nf-core/circularmapper/circulargenerator/meta.yml @@ -0,0 +1,55 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "circularmapper_circulargenerator" +description: A method to improve mappings on circular genomes, using the BWA mapper. +keywords: + - sort + - example + - genomics +tools: + - "circulargenerator": + description: "Creating a modified reference genome, with an elongation of the an specified amount of bases" + homepage: "https://github.com/apeltzer/CircularMapper" + documentation: "https://github.com/apeltzer/CircularMapper/blob/master/docs/contents/userguide.rst" + tool_dev_url: "https://github.com/apeltzer/CircularMapper" + doi: "no DOI available" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - reference: + type: file + description: Genome fasta file + pattern: "*.fasta" + + - elong: + type: integer + description: The number of bases that the ends of the target chromosome in the reference genome should be elongated by + +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + + - fasta: + type: file + description: Genome fasta file + pattern: "*.fasta" + +authors: + - "@apalleja" +maintainers: + - "" diff --git a/modules/nf-core/circularmapper/realignsamfile/environment.yml b/modules/nf-core/circularmapper/realignsamfile/environment.yml new file mode 100644 index 000000000..d9beb5ae1 --- /dev/null +++ b/modules/nf-core/circularmapper/realignsamfile/environment.yml @@ -0,0 +1,7 @@ +name: circularmapper_realignsamfile +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::circularmapper=1.93.5 diff --git a/modules/nf-core/circularmapper/realignsamfile/main.nf b/modules/nf-core/circularmapper/realignsamfile/main.nf new file mode 100644 index 000000000..579815df0 --- /dev/null +++ b/modules/nf-core/circularmapper/realignsamfile/main.nf @@ -0,0 +1,57 @@ +process CIRCULARMAPPER_REALIGNSAMFILE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/circularmapper:1.93.5--h4a94de4_1': + 'biocontainers/circularmapper:1.93.5--h4a94de4_1' }" + + input: + tuple val(meta), path(bam) + tuple val(meta2), path(fasta) + val(elongation_factor) + + output: + tuple val(meta), path("*_realigned.bam") , emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.93.5' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + realignsamfile \\ + -Xmx${task.memory.toGiga()}g \\ + ${args} \\ + -e ${elongation_factor} \\ + -i ${bam} \\ + -r ${fasta} + + ## realignsamfile has a hardcoded output name. Rename if necessary to use prefix. + if [[ "${bam.getBaseName()}_realigned.bam" != "${prefix}_realigned.bam" ]]; then + mv ${bam.getBaseName()}_realigned.bam ${prefix}_realigned.bam + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + CircularMapper: ${VERSION} + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.93.5' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}_realigned.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + CircularMapper: ${VERSION} + END_VERSIONS + """ +} diff --git a/modules/nf-core/circularmapper/realignsamfile/meta.yml b/modules/nf-core/circularmapper/realignsamfile/meta.yml new file mode 100644 index 000000000..bc4173754 --- /dev/null +++ b/modules/nf-core/circularmapper/realignsamfile/meta.yml @@ -0,0 +1,58 @@ +name: "circularmapper_realignsamfile" +description: Realign reads mapped with BWA to elongated reference genome +keywords: + - realign + - circular + - map + - reference + - fasta + - bam + - short-read + - bwa +tools: + - "circularmapper": + description: "A method to improve mappings on circular genomes such as Mitochondria." + homepage: "https://circularmapper.readthedocs.io/en/latest/index.html" + documentation: "https://circularmapper.readthedocs.io/en/latest/index.html" + tool_dev_url: "https://github.com/apeltzer/CircularMapper/" + doi: "10.1186/s13059-016-0918-z" + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - bam: + type: file + description: BAM/SAM file + pattern: "*.{bam,sam}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - fasta: + type: file + description: Input elongated genome fasta + - elongation_factor: + type: integer + description: The elongation factor used when running circulargenerator, i.e. the number of bases that the ends of the target chromosome in the reference genome was elongated by +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - bam: + type: file + description: Realigned BAM file + pattern: "*.bam" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@shyama-mama" + - "@jbv2" + - "@TCLamnidis" diff --git a/subworkflows/local/circularmapper.nf b/subworkflows/local/circularmapper.nf new file mode 100644 index 000000000..9d6abe311 --- /dev/null +++ b/subworkflows/local/circularmapper.nf @@ -0,0 +1,81 @@ +// +// Run circularmapper +// + +include { CIRCULARMAPPER_CIRCULARGENERATOR } from '../../modules/nf-core/circularmapper/circulargenerator/main' +include { CIRCULARMAPPER_REALIGNSAMFILE } from '../../modules/nf-core/circularmapper/realignsamfile/main' +include { BWA_ALN as BWA_ALN_CIRCULARMAPPER } from '../../modules/nf-core/bwa/aln/main' +include { BWA_INDEX as BWA_INDEX_CIRCULARMAPPER } from '../../modules/nf-core/bwa/index/main' +include { BWA_SAMSE as BWA_SAMSE_CIRCULARMAPPER } from '../../../modules/nf-core/bwa/samse/main' + +workflow CIRCULARMAPPER { + + take: + fasta_reference // channel (mandatory): [ val(meta), path(reference) ] + eval // channel (mandatory): val(elongation value) + fastq_reads // channel (mandatory): [ val(meta), path(reads) ] + + main: + + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + if ( params.run_circularmapper ) { + + ch_reference = fasta_reference + ch_eval = eval + + CIRCULARMAPPER_CIRCULARGENERATOR(ch_reference, ch_eval) + ch_versions = ch_versions.mix( CIRCULARMAPPER_CIRCULARGENERATOR.out.versions.first() ) + + BWA_INDEX_CIRCULARMAPPER(CIRCULARMAPPER_CIRCULARGENERATOR.out.fasta) + ch_versions = ch_versions.mix( BWA_INDEX_CIRCULARMAPPER.out.versions.first() ) + + ch_input_bwa_aln = fastq_reads + .map { + // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute + addNewMetaFromAttributes( it, "reference" , "reference" , false ) + } + .groupTuple(by:0) + .combine( BWA_INDEX_CIRCULARMAPPER.out.index, by: 0 ) // [ [meta], fastq, bai ] + .multiMap { + combo_meta, metas, fastq, ref_bai, bai -> + def ids = metas.collect { meta -> meta.id } + fastqs: [ combo_meta + [id: ids], fastq ] + bai: [ ref_bai, bai ] + } + + BWA_ALN_CIRCULARMAPPER(ch_input_bwa_aln) + ch_versions = ch_versions.mix( BWA_ALN_CIRCULARMAPPER.out.versions.first() ) + + ch_input_bwa_samse = ch_input_bwa_aln + .combine( BWA_ALN_CIRCULARMAPPER.out.sai, by: 0 ) // [ [meta], fastq, bai, sai ] + .multiMap { + metas, fastq, ref_bai, bai, ref_sai, sai -> + fastqs: [ metas, fastq, sai ] + bai: [ ref_bai, bai ] + } + + BWA_SAMSE_CIRCULARMAPPER(ch_input_bwa_samse) + ch_versions = ch_versions.mix( BWA_SAMSE_CIRCULARMAPPER.out.versions.first() ) + + ch_input_realignsamfile = BWA_SAMSE_CIRCULARMAPPER.out.bam + .combine(CIRCULARMAPPER_CIRCULARGENERATOR.out.fasta, by: 0) + .combine(ch_eval) + .multiMap { + ref_bam, bam, ref_fasta, fasta, ch_eval -> + bam: [ ref_bam, bam ] + fasta: [ ref_fasta, fasta ] + eval: [ ch_eval ] + } + + CIRCULARMAPPER_REALIGNSAMFILE(ch_input_realignsamfile) + ch_versions = ch_versions.mix( CIRCULARMAPPER_REALIGNSAMFILE.out.versions.first() ) + + emit: + + bam = CIRCULARMAPPER_REALIGNSAMFILE.out.bam // channel: [ val(meta), path(bam) ] + versions = ch_versions // channel: [ path(versions.yml) ] + + } +} From 3bbb28c65ec164b3c4f8a640576970a1c4f5529f Mon Sep 17 00:00:00 2001 From: Judith Ballesteros Date: Fri, 21 Jun 2024 11:58:59 +0200 Subject: [PATCH 02/61] adding parameters and outputs. Missing to add subworkflow to the main workflow --- CITATIONS.md | 4 ++++ conf/modules.config | 20 ++++++++++++++++++++ docs/output.md | 11 +++++++++++ nextflow.config | 4 ++++ nextflow_schema.json | 19 ++++++++++++++----- subworkflows/local/circularmapper.nf | 2 +- 6 files changed, 54 insertions(+), 6 deletions(-) diff --git a/CITATIONS.md b/CITATIONS.md index 3715b56a8..8c742dc34 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -126,6 +126,10 @@ > Sex.DetERRmine.py Lamnidis, T.C. et al., 2018. Ancient Fennoscandian genomes reveal origin and spread of Siberian ancestry in Europe. Nature communications, 9(1), p.5018. Available at: http://dx.doi.org/10.1038/s41467-018-07483-5. Download: https://github.com/TCLamnidis/Sex.DetERRmine + - [CircularMapper](https://doi.org/10.1186/s13059-016-0918-z) + + > Peltzer, A., Jäger, G., Herbig, A., Seitz, A., Kniep, C., Krause, J., & Nieselt, K. (2016). EAGER: efficient ancient genome reconstruction. Genome Biology, 17(1), 1–14. doi: [10.1186/s13059-016-0918-z](https://doi.org/10.1186/s13059-016-0918-z) + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/conf/modules.config b/conf/modules.config index b03153561..c4f01939b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -529,6 +529,26 @@ process { ] } + withName: CIRCULARMAPPER_CIRCULARGENERATOR { + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } + publishDir = [ + path: { "${params.outdir}/mapping/circularmapper/" }, + mode: params.publish_dir_mode, + pattern: '*[0-9].fasta' + ] + } + + withName: CIRCULARMAPPER_REALIGNSAMFILE { + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } + publishDir = [ + path: { "${params.outdir}/mapping/circularmapper/" }, + mode: params.publish_dir_mode, + pattern: '*_realigned.bam' + ] + } + // // DEDUPLICATION // diff --git a/docs/output.md b/docs/output.md index 9237f0c55..b29959f56 100644 --- a/docs/output.md +++ b/docs/output.md @@ -634,3 +634,14 @@ When using pileupCaller for genotyping, single-stranded and double-stranded libr [ANGSD](http://www.popgen.dk/angsd/index.php/ANGSD) is a software for analyzing next generation sequencing data. It can estimate genotype likelihoods and allele frequencies from next-generation sequencing data. The output provided is a bgzipped genotype likelihood file, containing likelihoods across all samples per reference. Users can specify the model used for genotype likelihood estimation, as well as the output format. For more information on the available options, see the [ANGSD](https://www.popgen.dk/angsd/index.php/Genotype_Likelihoods). + +#### CircularMapper + +
+Output files + +- `mapping/circularmapper` + + - `*realigned.bam`: BAM file realigned to the extended reference + +
diff --git a/nextflow.config b/nextflow.config index 602331d5e..d7b93a3aa 100644 --- a/nextflow.config +++ b/nextflow.config @@ -179,6 +179,10 @@ params { mapstats_preseq_cval = 0.95 mapstats_preseq_defects_mode = false + //Circular Mapper + run_circularmapper = true + elongation_factor = 500 + // Damage Calculation options skip_damagecalculation = false damagecalculation_tool = 'damageprofiler' diff --git a/nextflow_schema.json b/nextflow_schema.json index 9b1102705..237c676ee 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1162,6 +1162,18 @@ "snpcapture_bed": { "type": "string", "description": "Path to snp capture in BED format. Provided file can also be gzipped." + }, + "run_circularmapper": { + "type": "boolean", + "default": true, + "fa_icon": "fas fa-check-circle" + }, + "elongation_factor": { + "type": "integer", + "default": 500, + "description": "Specify the number of bases to extend reference by (circularmapper only)", + "help_text": "The number of bases to extend the reference genome with. By default this is set to 500 if not specified otherwise.", + "fa_icon": "fas fa-external-link-alt" } }, "fa_icon": "fas fa-search" @@ -1312,7 +1324,7 @@ }, "contamination_estimation_angsd_hapmap": { "type": "string", - "default": "${projectDir}/assets/angsd_resources/HapMapChrX.gz", + "default": "/Users/judith_ballesteros/Documents/GitHub/eager/assets/angsd_resources/HapMapChrX.gz", "description": "Path to HapMap file of chromosome for contamination estimation..", "help_text": "The haplotype map, or \"HapMap\", records the location of haplotype blocks and their tag SNPs.", "fa_icon": "fas fa-map" @@ -1394,14 +1406,11 @@ { "$ref": "#/definitions/host_removal" }, - { - "$ref": "#/definitions/human_sex_determination" - }, { "$ref": "#/definitions/contamination_estimation" }, { - "$ref": "#/definitions/contamination_estimation" + "$ref": "#/definitions/human_sex_determination" } ] } diff --git a/subworkflows/local/circularmapper.nf b/subworkflows/local/circularmapper.nf index 9d6abe311..1d98dc4e5 100644 --- a/subworkflows/local/circularmapper.nf +++ b/subworkflows/local/circularmapper.nf @@ -74,7 +74,7 @@ workflow CIRCULARMAPPER { emit: - bam = CIRCULARMAPPER_REALIGNSAMFILE.out.bam // channel: [ val(meta), path(bam) ] + bam = CIRCULARMAPPER_REALIGNSAMFILE.out.bam // channel: [ val(meta), path(bam) ] versions = ch_versions // channel: [ path(versions.yml) ] } From 25ec1dadbf6283bb1752e383691b8c0feedea769 Mon Sep 17 00:00:00 2001 From: Judith Ballesteros Date: Fri, 28 Jun 2024 11:42:31 +0200 Subject: [PATCH 03/61] adding circularmapper to map.nf --- nextflow.config | 1 - nextflow_schema.json | 19 +++++++------------ subworkflows/local/circularmapper.nf | 4 +--- subworkflows/local/map.nf | 21 +++++++++++++++++++++ 4 files changed, 29 insertions(+), 16 deletions(-) diff --git a/nextflow.config b/nextflow.config index d7b93a3aa..63663b004 100644 --- a/nextflow.config +++ b/nextflow.config @@ -180,7 +180,6 @@ params { mapstats_preseq_defects_mode = false //Circular Mapper - run_circularmapper = true elongation_factor = 500 // Damage Calculation options diff --git a/nextflow_schema.json b/nextflow_schema.json index 237c676ee..8678793dc 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -603,6 +603,13 @@ "description": "Specify the maximum fragment length for Bowtie2 paired-end mapping mode only.", "help_text": "The maximum fragment for valid paired-end alignments. Only for paired-end mapping (i.e. unmerged), and therefore typically only useful for modern data.\n\n> Modifies Bowtie2 parameter: `--maxins`", "fa_icon": "fas fa-exchange-alt" + }, + "elongation_factor": { + "type": "integer", + "default": 500, + "description": "Specify the number of bases to extend reference by (circularmapper only)", + "help_text": "The number of bases to extend the reference genome with. By default this is set to 500 if not specified otherwise.", + "fa_icon": "fas fa-external-link-alt" } }, "fa_icon": "fas fa-layer-group" @@ -1162,18 +1169,6 @@ "snpcapture_bed": { "type": "string", "description": "Path to snp capture in BED format. Provided file can also be gzipped." - }, - "run_circularmapper": { - "type": "boolean", - "default": true, - "fa_icon": "fas fa-check-circle" - }, - "elongation_factor": { - "type": "integer", - "default": 500, - "description": "Specify the number of bases to extend reference by (circularmapper only)", - "help_text": "The number of bases to extend the reference genome with. By default this is set to 500 if not specified otherwise.", - "fa_icon": "fas fa-external-link-alt" } }, "fa_icon": "fas fa-search" diff --git a/subworkflows/local/circularmapper.nf b/subworkflows/local/circularmapper.nf index 1d98dc4e5..73578bda1 100644 --- a/subworkflows/local/circularmapper.nf +++ b/subworkflows/local/circularmapper.nf @@ -6,7 +6,7 @@ include { CIRCULARMAPPER_CIRCULARGENERATOR } from '../../modules/nf-core/ci include { CIRCULARMAPPER_REALIGNSAMFILE } from '../../modules/nf-core/circularmapper/realignsamfile/main' include { BWA_ALN as BWA_ALN_CIRCULARMAPPER } from '../../modules/nf-core/bwa/aln/main' include { BWA_INDEX as BWA_INDEX_CIRCULARMAPPER } from '../../modules/nf-core/bwa/index/main' -include { BWA_SAMSE as BWA_SAMSE_CIRCULARMAPPER } from '../../../modules/nf-core/bwa/samse/main' +include { BWA_SAMSE as BWA_SAMSE_CIRCULARMAPPER } from '../../modules/nf-core/bwa/samse/main' workflow CIRCULARMAPPER { @@ -20,7 +20,6 @@ workflow CIRCULARMAPPER { ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() - if ( params.run_circularmapper ) { ch_reference = fasta_reference ch_eval = eval @@ -77,5 +76,4 @@ workflow CIRCULARMAPPER { bam = CIRCULARMAPPER_REALIGNSAMFILE.out.bam // channel: [ val(meta), path(bam) ] versions = ch_versions // channel: [ path(versions.yml) ] - } } diff --git a/subworkflows/local/map.nf b/subworkflows/local/map.nf index 7aa267d37..f738161c5 100644 --- a/subworkflows/local/map.nf +++ b/subworkflows/local/map.nf @@ -12,6 +12,7 @@ include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MEM } from '../../modules/nf include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_BT2 } from '../../modules/nf-core/samtools/index/main' include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MERGED_LANES } from '../../modules/nf-core/samtools/index/main' include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_MAPPED } from '../../modules/nf-core/samtools/flagstat/main' +include { CIRCULARMAPPER } from '../../subworkflows/local/circularmapper' workflow MAP { take: @@ -113,8 +114,28 @@ workflow MAP { SAMTOOLS_INDEX_BT2 ( ch_mapped_lane_bam ) ch_versions = ch_versions.mix(SAMTOOLS_INDEX_BT2.out.versions.first()) ch_mapped_lane_bai = params.fasta_largeref ? SAMTOOLS_INDEX_BT2.out.csi : SAMTOOLS_INDEX_BT2.out.bai + + } else if ( params.mapping_tool == 'circularmapper' ) { + ch_eval = params.elongation_factor + + ch_input_for_circularmapper = reads + .combine(index.map{ meta, index, fasta -> [ meta, fasta ] }) + .combine(ch_eval) + .multiMap { + meta, reads, meta2, fasta, eval -> + reads: [ meta, reads ] + index: [ meta2, fasta ] + elon: [ eval ] + } + CIRCULARMAPPER(ch_input_for_circularmapper) + ch_versions = ch_versions.mix ( CIRCULARMAPPER.out.versions.first() ) + ch_mapped_bam = CIRCULARMAPPER.out.bam + ch_mapped_bai = Channel.empty() // Circularmapper doesn't give a bai + + } + // Only run merge lanes if we have more than one BAM to merge! ch_input_for_lane_merge = ch_mapped_lane_bam .map { From 68f724359e2983cb400afafac395a17504f0c4d0 Mon Sep 17 00:00:00 2001 From: Judith Ballesteros Date: Fri, 28 Jun 2024 11:45:58 +0200 Subject: [PATCH 04/61] removing local path --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 8678793dc..7193b1a3f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1319,7 +1319,7 @@ }, "contamination_estimation_angsd_hapmap": { "type": "string", - "default": "/Users/judith_ballesteros/Documents/GitHub/eager/assets/angsd_resources/HapMapChrX.gz", + "default": "${projectDir}/assets/angsd_resources/HapMapChrX.gz", "description": "Path to HapMap file of chromosome for contamination estimation..", "help_text": "The haplotype map, or \"HapMap\", records the location of haplotype blocks and their tag SNPs.", "fa_icon": "fas fa-map" From 3fac93d0b28b95081ef32d26ca1f6de9aa93c542 Mon Sep 17 00:00:00 2001 From: Judith Ballesteros Date: Fri, 28 Jun 2024 12:03:53 +0200 Subject: [PATCH 05/61] fixing schema --- nextflow_schema.json | 563 +++++++++++++++++++++---------------------- 1 file changed, 280 insertions(+), 283 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 06737bf47..b59b73117 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -19,14 +19,14 @@ "mimetype": "text/csv", "pattern": "^\\S+\\.(c|t)sv$", "schema": "assets/schema_input.json", - "description": "Path to tab- or comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a tab- or comma-separated file with 11 columns, and a header row. See [usage docs](https://nf-co.re/eager/usage#samplesheet-input).", + "description": "Path to comma-separated file containing information about the samples in the experiment.", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/eager/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" }, "convert_inputbam": { "type": "boolean", "description": "Specify to convert input BAM files back to FASTQ for remapping", - "help_text": "This parameter tells the pipeline to convert the BAM files listed in the `--input` TSV or CSV sheet back to FASTQ format to allow re-preprocessing and mapping.\n\nCan be useful when you want to ensure consistent mapping parameters across all libraries when incorporating public data, however be careful of biases that may come from re-processing again (the BAM files may already be clipped, or only mapped reads with different settings are included so you may not have all reads from the original publication).", + "help_text": "This parameter tells the pipeline to convert the BAM files listed in the `--input` TSV or CSV sheet back to FASTQ format to allow re-preprocessing and mapping\n\nCan be useful when you want to ensure consistent mapping parameters across all libraries when incorporating public data, however be careful of biases that may come from re-processing again (the BAM files may already be clipped, or only mapped reads with different settings are included so you may not have all reads from the original publication).", "fa_icon": "fas fa-undo-alt" }, "outdir": { @@ -62,32 +62,31 @@ "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "errorMessage": "The path to the reference FASTA file must not contain spaces and must have file extensions '.fasta', '.fa', '.fas', '.fna', '.fasta.gz','.fa.gz','.fas.gz' or '.fna.gz'.", - "description": "Path to FASTA file of the reference genome.", + "description": "Path to FASTA genome file.", "help_text": "This parameter is *mandatory* if `--genome` or `--fasta_sheet` are not specified. If you don't supply a mapper index (e.g. for BWA), this will be generated for you automatically. Combine with `--save_reference` to save mapper index for future runs.", "fa_icon": "far fa-file-code" }, "fasta_fai": { "type": "string", - "description": "Specify path to samtools FASTA index.", + "description": "Path to samtools FASTA index (typically ending in '.fai'). If not supplied will be made for you.", "help_text": "If you want to use a pre-existing `samtools faidx` index, use this to specify the required FASTA index file for the selected reference genome. This should be generated by samtools faidx and has a file suffix of `.fai`.", "fa_icon": "fas fa-address-book" }, "fasta_dict": { "type": "string", - "description": "Specify path to Picard sequence dictionary file.", + "description": "Path to picard sequence dictionary file (typically ending in '.dict'). If not supplied will be made for you.", "help_text": "If you want to use a pre-existing `picard CreateSequenceDictionary` dictionary file, use this to specify the required `.dict` file for the selected reference genome.", "fa_icon": "fas fa-address-book" }, "fasta_mapperindexdir": { "type": "string", - "description": "Specify path to directory containing index files of the FASTA for a given mapper.", - "help_text": "For most people this will likely be the same directory that contains the file you provided to `--fasta`.\n\nIf you want to use pre-existing `bwa index` indices, the directory should contain files ending in '.amb' '.ann' '.bwt'. If you want to use pre-existing `bowtie2 build` indices, the directory should contain files ending in'.1.bt2', '.2.bt2', '.rev.1.bt2'.\n\nIn any case do not include the files themselves in the path. nf-core/eager will automagically detect the index files by searching for the FASTA filename with the corresponding `bwa index`/`bowtie2 build` file suffixes. If not supplied, the indices will be generated for you.\n\n", - "fa_icon": "fas fa-folder-open" + "description": "Path to directory containing index files of the FASTA for a given mapper.", + "help_text": "For most people this will likely be the same directory that contains the file you provided to `--fasta`.\n\nIf you want to use pre-existing `bwa index` indices, the directory should contain files ending in '.amb' '.ann' '.bwt'. If you want to use pre-existing `bowtie2 build` indices, the directory should contain files ending in'.1.bt2', '.2.bt2', '.rev.1.bt2'. \n\nIn any case do not include the files themselves in the path. nf-core/eager will automagically detect the index files by searching for the FASTA filename with the corresponding `bwa index`/`bowtie2 build` file suffixes. If not supplied, the indices will be generated for you.\n\n" }, "save_reference": { "type": "boolean", - "description": "Specify to save any pipeline-generated reference genome indices in the results directory.", - "help_text": "Use this if you do not have pre-made reference FASTA indices for `bwa`, `samtools` and `picard`. If you turn this on, the indices nf-core/eager generates for you and will be saved in the `/results/reference_genomes` for you. If not supplied, nf-core/eager generated index references will be deleted.\n\n> Modifies SAMtools index command: `-c`", + "description": "Specify to save any pipeline-generated reference genome indices in the results directory.", + "help_text": "Use this if you do not have pre-made reference FASTA indices for `bwa`, `samtools` and `picard`. If you turn this on, the indices nf-core/eager generates for you and will be saved in the `/results/reference_genomes` for you. If not supplied, nf-core/eager generated index references will be deleted.\n\n> modifies SAMtools index command: `-c`", "fa_icon": "fas fa-save" }, "fasta_sheet": { @@ -126,8 +125,8 @@ }, "fasta_circular_target": { "type": "string", - "description": "Specify the FASTA header of the target chromosome to extend when using `circularmapper`.", - "help_text": "The entry (chromosome, contig, etc.) in your FASTA reference that you'd like to be treated as circular.\n\nApplies only when providing a single FASTA file via `--fasta` (NOT multi-reference input - see reference TSV/CSV input).\n\n> Modifies tool parameter(s):\n> - circulargenerator `-s`", + "description": "Specify the FASTA header of the target chromosome to extend. Only applies when using `circularmapper`.", + "help_text": "The entry (chromosome, contig, etc.) in your FASTA reference that you'd like to be treated as circular.\n\nApplies only when providing a single FASTA file via `--fasta` (NOT multi-reference input - see reference TSV/CSV input).\n\n> Modifies tool parameter(s):\n> - circulargenerator `-s`\n", "fa_icon": "fas fa-bullseye" } } @@ -336,7 +335,7 @@ "preprocessing": { "title": "Preprocessing", "type": "object", - "description": "Removal of adapters, paired-end merging, poly-G removal, etc.", + "description": "Removal of adapters, paired-end merging, poly-G removal etc.", "default": "", "properties": { "sequencing_qc_tool": { @@ -349,8 +348,8 @@ }, "skip_preprocessing": { "type": "boolean", - "description": "Specify to skip all preprocessing steps (adapter removal, paired-end merging, poly-G trimming, etc).", - "help_text": "Specify to skip all preprocessing steps (adapter removal, paired-end merging, poly-G trimming etc).\n\nThis will also mean you will only get one set of FastQC results (of the input reads).", + "description": "Specify to skip all preprocessing steps (adapter removal, paired-end merging, poly-g trimming etc).", + "help_text": "Specify to skip all preprocessing steps (adapter removal, paired-end merging, poly-g trimming etc).\n\nThis will also mean you will only get one set of FastQC results (of the input reads).", "fa_icon": "fas fa-forward" }, "preprocessing_tool": { @@ -365,18 +364,18 @@ "type": "boolean", "description": "Specify to skip read-pair merging.", "fa_icon": "fas fa-forward", - "help_text": "Turns off the paired-end read merging, and will result in paired-end mapping modes being used during reference of reads again alignment.\n\nThis can be useful in cases where you have long ancient DNA reads, modern DNA or when you want to utilise mate-pair 'spatial' information.\n\n ⚠️ If you run this with --preprocessing_minlength set to a value (as is by default!), you may end up removing single reads from either the pair1 or pair2 file. These reads will be NOT be mapped when aligning with either BWA or bowtie, as both can only accept one (forward) or two (forward and reverse) FASTQs as input in paired-end mode.\n\n> ⚠️ If you run metagenomic screening as well as skipping merging, all reads will be screened as independent reads - not as pairs! - as all FASTQ files from BAM filtering are merged into one. This merged file is _not_ saved in results directory.\n\n> Modifies AdapterRemoval parameter: `--collapse`\n> Modifies fastp parameter: `--merge`" + "help_text": "Turns off the paired-end read merging, and will result in paired-end mapping modes being used during reference of reads again alignment.\n\nThis can be useful in cases where you have long ancient DNA reads, modern DNA, or when you want to utilise mate-pair 'spatial' information..\n\n\u26a0\ufe0f If you run this and also with --preprocessing_minlength set to a value (as is by default!), you may end up removing single reads from either the pair1 or pair2 file. These reads will be NOT be mapped when aligning with either bwa or bowtie, as both can only accept one (forward) or two (forward and reverse) FASTQs as input in paired-end mode.\n\n> \u26a0\ufe0f If you run metagenomic screening as well as skipping merging, all reads will be screened as independent reads - not as pairs! - as all FASTQ files from BAM filtering are merged into one. This merged file is _not_ saved in results directory.\n\n> Modifies AdapterRemoval parameter: `--collapse`\n> Modifies fastp parameter: `--merge`" }, "preprocessing_excludeunmerged": { "type": "boolean", - "description": "Specify to exclude read-pairs that did not overlap sufficiently for merging (i.e., keep merged reads only).", + "description": "Specify to exclude pairs that did not overlap sufficiently for merging (i.e., keep merged reads only).", "fa_icon": "fas fa-trash-alt", - "help_text": "Specify to exclude read-pairs that did not overlap sufficiently for merging (i.e., keep merged reads only). Singletons (i.e. reads missing a pair) or un-merged reads (where there wasn't sufficient overlap) are discarded.\n\nMost ancient DNA molecules are very short, and the majority are expected to merge. Specifying this parameter can sometimes be useful when dealing with ultra-short aDNA reads to reduce the number of longer-reads you may have in your library that are derived from modern contamination. It can also speed up run time of mapping steps.\n\nYou may want to use this if you want ensure only the best quality reads for your analysis, but with the penalty of potentially losing still valid data (even if some reads have slightly lower quality and/or are longer). It is highly recommended when using 'dedup' deduplication tool." + "help_text": "Specify to exclude pairs that did not overlap sufficiently for merging (i.e., keep merged reads only), in otherwords singletons (i.e. reads missing a pair), or un-merged reads (where there wasn't sufficient overlap) are discarded.\n\nMost ancient DNA molecules are very short, and the majority are expected to merge. Specifying this parameter can sometimes be useful when dealing with ultra-short aDNA reads to reduce the number of longer-reads you may have in your library that are derived from modern contamination. It can also speed up run time of mapping steps.\n\nYou may want to use this if you want ensure only the best quality reads for your analysis, but with the penalty of potentially losing still valid data (even if some reads have slightly lower quality and/or are longer). It is highly recommended when using 'dedup' deduplication tool." }, "preprocessing_skipadaptertrim": { "type": "boolean", "description": "Specify to skip removal of adapters.", - "help_text": "Specify to turn off trimming of adapters from reads.\n\nYou may wish to do this if you are using publicly available data, that _should_ have all library artefacts from reads removed.\n\nThis will override any other adapter parameters provided (i.e, `--preprocessing_adapterlist` and `--preprocessing_adapter{1,2}` will be ignored)!\n\n> Modifies AdapterRemoval parameter: `--adapter1` and `--adapter2` (sets both to an empty string)\n> Applies fastp parameter: `--disable_adapter_trimming`", + "help_text": "Specify to turn off trimming of adapters from reads.\n\nYou may wish to do this if you are using public data (e.g. ENA, SRA), that _should_ have all library artefacts from reads.\n\nThis will override any other adapter parameters provided (i.e, `--preprocessing_adapterlist` and or/ `--preprocessing_adapter{1,2}` will be ignored)!\n\n> Modifies AdapterRemoval parameter: `--adapter1` and `--adapter2` (sets both to an empty string)\n> Applies fastp parameter: `--disable_adapter_trimming`", "fa_icon": "fas fa-forward" }, "preprocessing_adapter1": { @@ -387,93 +386,93 @@ }, "preprocessing_adapter2": { "type": "string", - "description": "Specify the nucleotide sequence for the reverse read/R2.", + "description": "Specify the nucleotide sequence for the forward read/R2.", "fa_icon": "fas fa-grip-lines", "help_text": "Specify a nucleotide sequence for the forward read/R2.\n\nIf not modified by the user, the default for the particular preprocessing tool will be used. To turn off adapter trimming use `--preprocessing_skipadaptertrim`.\n\n> Modifies AdapterRemoval parameter: `--adapter2`\n> Modifies fastp parameter: `--adapter_sequence_r2`" }, "preprocessing_adapterlist": { "type": "string", - "description": "Specify a list of all possible adapters to trim.", - "help_text": "Specify a file with a list of adapter (combinations) to remove from all files.\n\nOverrides the `--preprocessing_adapter1`/`--preprocessing_adapter2` parameters.\n\nNote that the two tools have slightly different behaviours.\n\nFor AdapterRemoval this consists of a two column table with a `.txt` extension: first column represents forward strand, second column for reverse strand. You must supply all possible combinations, one per line, and this list is applied to all files. Only Adapters in this list will be screened for and removed. See AdapterRemoval documentation for more information.\n\nFor fastp this consists of a standard FASTA format with a `.fasta`/`.fa`/`.fna`/`.fas` extension. The adapter sequence in this file should be at least 6bp long, otherwise it will be skipped. fastp will first perform auto-detection and removal of adapters and then _additionally_ remove adapters present in the FASTA file one by one will.\n\n> Modifies AdapterRemoval parameter: `--adapter-list`\n> Modifies fastp parameter: `--adapter_fasta`", + "description": "Specify a list of all possible adapters to trim. Overrides --preprocessing_adapter1/2. Formats: .txt (AdapterRemoval) or .fasta. (fastp).", + "help_text": "Allows to supply a file with a list of adapter (combinations) to remove from all files. \n\nOverrides the `--preprocessing_adapter1`/`--preprocessing_adapter2` parameters . \n\nNote that the two tools have slightly different behaviours.\n\nFor AdapterRemoval this consists of a two column table with a `.txt` extension: first column represents forward strand, second column for reverse strand. You must supply all possibly combinations, one per line, and this list is applied to all files. Only Adapters in this list will be screened for and removed. See AdapterRemoval documentation for more information.\n\nFor fastp this consists of a standard FASTA format with a `.fasta`/`.fa`/`.fna`/`.fas` extension. The adapter sequence in this file should be at least 6bp long, otherwise it will be skipped. fastp first will perform auto-detection of reads and will be removed , and then _additionally_ adapters present in the FASTA file one by one will be removed.\n\n> Modifies AdapterRemoval parameter: `--adapter-list`\n> Modifies fastp parameter: `--adapter_fasta`", "fa_icon": "fas fa-list" }, "preprocessing_minlength": { "type": "integer", "default": 25, "description": "Specify the minimum length reads must have to be retained.", - "help_text": "Specify the minimum length reads must have to be retained.\n\nReads smaller than this length after trimming are discarded and not included in downstream analyses. Typically in ancient DNA, users will set this to 30 or for very old samples around 25 bp - reads any shorter that this often are not specific enough to provide useful information.\n\n> Modifies AdapterRemoval parameter: `--minlength`\n> Modifies fastp parameter: `--length_required`", - "fa_icon": "fas fa-ruler-horizontal" + "help_text": "Specify the minimum length reads must have to be retained. \n\nReads smaller than this length after trimming are discarded and not included in downstream analyses. Typically in ancient DNA users will set this to 30 or for very old samples around 25 bp - reads any shorter that this often are not specific enough to provide useful information.\n\n> Modifies AdapterRemoval parameter: `--minlength`\n> Modifies fastp parameter: `--length_required`", + "fa_icon": "fas fa-ruler" }, "preprocessing_trim5p": { "type": "integer", "default": 0, - "description": "Specify number of bases to hard-trim from 5 prime or front of reads.", - "help_text": "Specify number of bases to hard-trim from 5 prime or front of reads. Exact behaviour varies per tool, see documentation. By default set to `0` to not perform any hard trimming.\n\nThis parameter allows users to 'hard' remove a number of bases from the beginning or end of reads, regardless of quality.\n\n ⚠️ When this trimming occurs depends on the tool, i.e., the exact behaviour is not the same between AdapterRemoval and fastp.\n\nFor fastp: 5p/3p trimming occurs _prior_ to any other trimming (quality, poly-G, adapter). Please see the [fastp documentation](https://github.com/OpenGene/fastp#global-trimming) for more information. If you wish to use this to remove damage prior to mapping (to allow more specific mapping), ensure you have manually removed adapters/quality trimmed **prior** to giving the reads to nf-core/eager. Alternatively, you can use Bowtie 2's inbuilt pre-mapping read-end trimming functionality. Note that nf-core/eager only allows this hard trimming equally for both forward and reverse reads (i.e., you cannot provide different values for the 5p end for R1 and R2).\n\nFor AdapterRemoval, this trimming happens _after_ the removal of adapters, however prior to quality trimming. Therefore, this is more suitable for hard-removal of damage before mapping (however the Bowtie 2 system will be more reliable).\n\n> Modifies AdapterRemoval parameters: `--trim5p`\n> Modifies fastp parameters: `--trim_front1` and/or `--trim_front2`", + "description": "Specify number of bases to hard-trim from 5 prime or front of reads. Exact behaviour varies per tool, see documentation.", + "help_text": "Specify number of bases to hard-trim from 5 prime or front of reads. Exact behaviour varies per tool, see documentation. By default set to `0` to not perform any hard trimming.\n\nThis parameter allows users to 'hard' remove a number of bases from the beginning or end of reads, regardless of quality.\n\n\u26a0\ufe0f when this trimming occurs depends on the tool, i.e., the exact behaviour is not the same between AdapterRemoval and fastp.\n\nFor fastp: this 5p/3p trimming occurs _prior_ to any other trimming (quality, poly-G, adapter). Please see the [fastp documentation](https://github.com/OpenGene/fastp#global-trimming) for more information. If you wish to use this to remove damage prior mapping (to allow more specific mapping), ensure you have manually removed adapters/quality trimmed **prior** to giving the reads to nf-core/eager. Alternatively, you can use Bowtie2's inbuilt pre-mapping read-end trimming functionality. Note that nf-core/eager only allows this hard trimming equally for both forward and reverse reads (i.e., you cannot provide different values for the 5p end for R1 and R2).\n\nFor AdapterRemoval, this trimming happens _after_ the removal of adapters, however prior to quality trimming. Therefore this is more suitable for hard-removal of damage prior mapping (however the Bowtie2 system will be more reliable).\n\n> Modifies AdapterRemoval parameters: `--trim5p`\n> Modifies fastp parameters: `--trim_front1` and/or `--trim_front2`\n", "fa_icon": "fas fa-cut" }, "preprocessing_trim3p": { "type": "integer", "default": 0, - "description": "Specify number of bases to hard-trim from 3 prime or tail of reads.", + "description": "Specify number of bases to hard-trim from 3 prime or tail of reads. Exact behaviour varies per tool, see documentation.", "fa_icon": "fas fa-cut", - "help_text": "Specify number of bases to hard-trim from 3 prime or tail of reads. Exact behaviour varies per tool, see documentation. By default set to `0` to not perform any hard trimming.\n\nThis parameter allows users to 'hard' remove a number of bases from the beginning or end of reads, regardless of quality.\n\n⚠️ When this trimming occurs depends on the tool, i.e., the exact behaviour is not the same between AdapterRemoval and fastp.\n\nFor fastp: 5p/3p trimming occurs _prior_ to any other trimming (quality, poly-G, adapter). Please see the [fastp documentation](https://github.com/OpenGene/fastp#global-trimming) for more information. If you wish to use this to remove damage prior to mapping (to allow more specific mapping), ensure you have manually removed adapters/quality trimmed **prior** to giving the reads to nf-core/eager. Alternatively, you can use Bowtie 2's inbuilt pre-mapping read-end trimming functionality. Note that nf-core/eager only allows this hard trimming equally for both forward and reverse reads (i.e., you cannot provide different values for the 3p end for R1 and R2).\n\nFor AdapterRemoval, this trimming happens _after_ the removal of adapters, however prior to quality trimming. Therefore this is more suitable for hard-removal of damage before mapping (however the Bowtie 2 system will be more reliable).\n\n> Modifies AdapterRemoval parameters: `--trim3p`\n> Modifies fastp parameters: `--trim_tail1` and/or `--trim_tail2`" + "help_text": "Specify number of bases to hard-trim from 3 prime or tail of reads. Exact behaviour varies per tool, see documentation. By default set to `0` to not perform any hard trimming.\n\nThis parameter allows users to 'hard' remove a number of bases from the beginning or end of reads, regardless of quality.\n\n\u26a0\ufe0f when this trimming occurs depends on the tool, i.e., the exact behaviour is not the same between AdapterRemoval and fastp.\n\nFor fastp: this 5p/3p trimming occurs _prior_ to any other trimming (quality, poly-G, adapter). Please see the [fastp documentation](https://github.com/OpenGene/fastp#global-trimming) for more information. If you wish to use this to remove damage prior mapping (to allow more specific mapping), ensure you have manually removed adapters/quality trimmed **prior** to giving the reads to nf-core/eager. Alternatively, you can use Bowtie2's inbuilt pre-mapping read-end trimming functionality. Note that nf-core/eager only allows this hard trimming equally for both forward and reverse reads (i.e., you cannot provide different values for the 3p end for R1 and R2).\n\nFor AdapterRemoval, this trimming happens _after_ the removal of adapters, however prior to quality trimming. Therefore this is more suitable for hard-removal of damage prior mapping (however the Bowtie2 system will be more reliable).\n\n> Modifies AdapterRemoval parameters: `--trim3p`\n> Modifies fastp parameters: `--trim_tail1` and/or `--trim_tail2`\n" }, "preprocessing_savepreprocessedreads": { "type": "boolean", "description": "Specify to save the preprocessed reads in the results directory.", "fa_icon": "fas fa-save", - "help_text": "Specify to save the preprocessed reads in FASTQ format the results directory.\n\nThis can be useful for re-analysing FASTQ files manually, or uploading to public data repositories such as ENA/SRA (provided you don't filter by length or merge paired reads)." + "help_text": "Specify to save the preprocessed reads in FASTQ format the results directory. \n\nThis can be useful for re-analysing in FASTQ files manually, or uploading to public data repositories such as ENA/SRA (provided you don't do length filtering nor merging)." }, "preprocessing_fastp_complexityfilter": { "type": "boolean", - "description": "Specify to turn on sequence complexity filtering of reads.", + "description": "Specify to turn on sequence complexity filtering of reads with fastp.", "help_text": "Performs a poly-G tail removal step in the beginning of the pipeline using fastp.\n\nThis can be useful for trimming ploy-G tails from short-fragments sequenced on two-colour Illumina chemistry such as NextSeqs or NovaSeqs (where no-fluorescence is read as a G on two-colour chemistry), which can inflate reported GC content values.\n\n> Modifies fastp parameter: `--trim_poly_g`", - "fa_icon": "fas fa-power-off" + "fa_icon": "fas fa-cut" }, "preprocessing_fastp_complexityfilter_threshold": { "type": "integer", "default": 10, "description": "Specify the complexity threshold that must be reached or exceeded to retain reads.", "help_text": "This option can be used to define the minimum length of a poly-G tail to begin low complexity trimming.\n\n> Modifies fastp parameter: `--poly_g_min_len`", - "fa_icon": "fas fa-filter" + "fa_icon": "fas fa-ruler" }, "preprocessing_adapterremoval_preserve5p": { "type": "boolean", - "description": "Skip AdapterRemoval quality and N base trimming at 5 prime end.", + "description": "Skip AdapterRemoval base trimming (n, quality) of 5 prime end.", "help_text": "Turns off quality based trimming at the 5p end of reads when any of the AdapterRemoval quality or N trimming options are used. Only 3p end of reads will be removed.\n\nThis also entirely disables quality based trimming of collapsed reads, since both ends of these are informative for PCR duplicate filtering. For more information see the AdapterRemoval [documentation](https://adapterremoval.readthedocs.io/en/stable/manpage.html#cmdoption-adapterremoval-preserve5p).\n\n> Modifies AdapterRemoval parameters: `--preserve5p`", "fa_icon": "fas fa-shield-alt" }, "preprocessing_adapterremoval_skipqualitytrimming": { "type": "boolean", - "description": "Specify to skip AdapterRemoval quality and N trimming at the ends of reads.", - "help_text": "Turns off AdapterRemoval quality trimming from ends of reads.\n\nThis can be useful to reduce runtime when running public data that has already been processed.\n\n> Modifies AdapterRemoval parameters: `--trimqualities` ", + "description": "Skip AdapterRemoval quality and N trimming from ends of reads.", + "help_text": "Turns off AdapterRemoval quality trimming from ends of reads. \n\nThis can be useful to reduce runtime when running public data that has already been processed.\n\n> Modifies AdapterRemoval parameters: `--trimqualities` ", "fa_icon": "fas fa-forward" }, "preprocessing_adapterremoval_trimbasequalitymin": { "type": "integer", "default": 20, "description": "Specify AdapterRemoval minimum base quality for trimming off bases.", - "help_text": "Defines the minimum read quality per base that is required for a base to be kept by AdapterRemoval. Individual bases at the ends of reads falling below this threshold will be clipped off.\n\n> Modifies AdapterRemoval parameter: `--minquality`", - "fa_icon": "fas fa-filter" + "help_text": "Defines the minimum read quality per base that is required for a base to be kept. Individual bases at the ends of reads falling below this threshold will be clipped off.\n\n> Modifies AdapterRemoval parameter: `--minquality`", + "fa_icon": "fas fa-ruler-vertical" }, "preprocessing_adapterremoval_skipntrimming": { "type": "boolean", - "description": "Specify to skip AdapterRemoval N trimming (quality trimming only).", - "help_text": "Turns off AdapterRemoval N trimming from ends of reads.\n\nThis can be useful to reduce runtime when running publicly available data that has already been processed.\n\n> Modifies AdapterRemoval parameters: `--trimns` ", + "description": "Skip AdapterRemoval N trimming (quality trimming only).", + "help_text": "Turns off AdapterRemoval N trimming from ends of reads. \n\nThis can be useful to reduce runtime when running public data that has already been processed.\n\n> Modifies AdapterRemoval parameters: `--trimns` ", "fa_icon": "fas fa-forward" }, "preprocessing_adapterremoval_adapteroverlap": { "type": "integer", "default": 1, "description": "Specify the AdapterRemoval minimum adapter overlap required for trimming.", - "fa_icon": "fas fa-filter", + "fa_icon": "fas fa-ruler-horizontal", "help_text": "Specifies a minimum number of bases that overlap with the adapter sequence before AdapterRemoval trims adapters sequences from reads.\n\n> Modifies AdapterRemoval parameter: `--minadapteroverlap`" }, "preprocessing_adapterremoval_qualitymax": { "type": "integer", "default": 41, - "description": "Specify the AdapterRemoval maximum Phred score used in input FASTQ files.", - "help_text": "Specify maximum Phred score of the quality field of FASTQ files.\n\nThe quality-score range can vary depending on the machine and version (e.g. see diagram [here](https://en.wikipedia.org/wiki/FASTQ_format#Encoding), and this allows you to increase from the default AdapterRemoval value of 41.\n\nNote that while this can theoretically provide you with more confident and precise base call information, many downstream tools only accept FASTQ files with Phred scores limited to a max of 41, and therefore increasing the default for this parameter may make the resulting preprocessed files incompatible with some downstream tools.\n\n> Modifies AdapterRemoval parameters: `--qualitymax`", + "description": "Specify the AdapterRemoval maximum Phred score used in input FASTQ files", + "help_text": "Specify maximum Phred score of the quality field of FASTQ files. \n\nThe quality-score range can vary depending on the machine and version (e.g. see diagram [here](https://en.wikipedia.org/wiki/FASTQ_format#Encoding), and this allows you to increase from the default AdapterRemoval value of 41. \n\nNote that while this theoretically can provide you with more confident and precise base call information, many downstream tools only accept FASTQ files with Phred scores limited to a max of 41, and therefore increasing the default for this parameter may make the resulting preprocessed files incompatible with some downstream tools.\n\n> Modifies AdapterRemoval parameters: `--qualitymax`", "fa_icon": "fas fa-tachometer-alt" } }, @@ -487,9 +486,9 @@ "properties": { "run_fastq_sharding": { "type": "boolean", - "description": "Specify to turn on FASTQ sharding.", + "description": "Turn on FastQ sharding.", "fa_icon": "fas fa-power-off", - "help_text": "Sharding will split the FASTQs into smaller chunks before mapping. These chunks are then mapped in parallel. This approach can speed up the mapping process for larger FASTQ files." + "help_text": "Sharding will split the FastQs into smaller chunks before mapping. These chunks are then mapped in parallel. This approach can speed up the mapping process for larger FastQ files." }, "fastq_shard_size": { "type": "integer", @@ -503,99 +502,99 @@ "default": "bwaaln", "enum": ["bwaaln", "bwamem", "bowtie2", "circularmapper"], "description": "Specify which mapper to use.", - "help_text": "Specify which mapping tool to use. Options are BWA aln ('`bwaaln`'), BWA mem ('`bwamem`'), circularmapper ('`circularmapper`'), or Bowtie 2 ('`bowtie2`'). BWA aln is the default and highly suited for short-read ancient DNA. BWA mem can be quite useful for modern DNA, but is rarely used in projects for ancient DNA. CircularMapper enhances the mapping procedure to circular references, using the BWA algorithm but utilizing an extend-remap procedure (see [Peltzer et al 2016](https://doi.org/10.1186/s13059-016-0918-z) for details). Bowtie 2 is similar to BWA aln, and has recently been suggested to provide slightly better results under certain conditions ([Poullet and Orlando 2020](https://doi.org/10.3389/fevo.2020.00105)), as well as providing extra functionality (such as FASTQ trimming).\n\nMore documentation can be seen for each tool under:\n\n- [BWA aln](http://bio-bwa.sourceforge.net/bwa.shtml#3)\n- [BWA mem](http://bio-bwa.sourceforge.net/bwa.shtml#3)\n- [CircularMapper](https://circularmapper.readthedocs.io/en/latest/contents/userguide.html)\n- [Bowtie 2](http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml#command-line)", - "fa_icon": "fas fa-hammer" + "help_text": "Specify which mapping tool to use. Options are BWA aln ('`bwaaln`'), BWA mem ('`bwamem`'), circularmapper ('`circularmapper`'), or bowtie2 ('`bowtie2`'). BWA aln is the default and highly suited for short-read ancient DNA. BWA mem can be quite useful for modern DNA, but is rarely used in projects for ancient DNA. CircularMapper enhances the mapping procedure to circular references, using the BWA algorithm but utilizing a extend-remap procedure (see Peltzer et al 2016, Genome Biology for details). Bowtie2 is similar to BWA aln, and has recently been suggested to provide slightly better results under certain conditions ([Poullet and Orlando 2020](https://doi.org/10.3389/fevo.2020.00105)), as well as providing extra functionality (such as FASTQ trimming).\n\nMore documentation can be seen for each tool under:\n\n- [BWA aln](http://bio-bwa.sourceforge.net/bwa.shtml#3)\n- [BWA mem](http://bio-bwa.sourceforge.net/bwa.shtml#3)\n- [CircularMapper](https://circularmapper.readthedocs.io/en/latest/contents/userguide.html)\n- [Bowtie2](http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml#command-line)", + "fa_icon": "fas fa-layer-group" }, "fasta_largeref": { "type": "boolean", - "description": "Specify to generate '.csi' BAM indices instead of '.bai' for larger reference genomes.", - "help_text": "This parameter is required to be set for large reference genomes. If your reference genome is larger than 3.5GB, the `samtools index` calls in the pipeline need to generate `.csi` indices instead of `.bai` indices to compensate for the size of the reference genome (with samtools: `-c`). This parameter is not required for smaller references (including the human reference genomes hg19 or grch37/grch38).", + "description": "Specify to generate more recent '.csi' BAM indices. If your reference genome is larger than 3.5GB, this is recommended due to more efficient data handling with the '.csi' format over the older '.bai'.", + "help_text": "This parameter is required to be set for large reference genomes. If your reference genome is larger than 3.5GB, the `samtools index` calls in the pipeline need to generate `.csi` indices instead of `.bai` indices to compensate for the size of the reference genome (with samtools: `-c`). This parameter is not required for smaller references (including the human hg19 or grch37/grch38 references), but >4GB genomes have been shown to need `.csi` indices.", "fa_icon": "fas fa-address-book" }, "mapping_bwaaln_n": { "type": "number", "default": 0.01, - "description": "Specify the amount of allowed mismatches in the alignment for mapping with BWA aln.", - "help_text": "Specify how many mismatches are allowed in a read during alignment with BWA aln. Default is set following recommendations from [Oliva et al. 2021](https://doi.org/10.1093/bib/bbab076) who compared alignment to human reference genomes.\n\nIf you're uncertain what value to use, check out this [Shiny App](https://apeltzer.shinyapps.io/bwa-mismatches/) for more information.\n\n> Modifies BWA aln parameter: `-n`", + "description": "Specify the -n parameter for BWA aln, i.e. amount of allowed mismatches in the alignment.", + "help_text": "Configures the `bwa aln -n` parameter, defining how many mismatches are allowed in a read. Default is set following recommendations from [Oliva et al. 2021](https://doi.org/10.1093/bib/bbab076) who tested when aligning to human reference genomes. \n\nIf you're uncertain what to set check out this [Shiny App](https://apeltzer.shinyapps.io/bwa-mismatches/) for more information on how to set this parameter efficiently.\n\n> Modifies bwa aln parameter: `-n`", "fa_icon": "fas fa-sort-numeric-down" }, "mapping_bwaaln_k": { "type": "integer", "default": 2, - "description": "Specify the maximum edit distance allowed in a seed for mapping with BWA aln.", - "help_text": "Specify the maximum edit distance during the seeding phase of the BWA aln mapping algorithm.\n\n> Modifies BWA aln parameter: `-k`", + "description": "Specify the -k parameter for BWA aln, i.e. maximum edit distance allowed in a seed.", + "help_text": "Configures the bwa aln `-k` parameter for the maximum edit distance during the seeding phase of the mapping algorithm.\n\n> Modifies BWA aln parameter: `-k`", "fa_icon": "fas fa-people-arrows" }, "mapping_bwaaln_l": { "type": "integer", "default": 1024, - "description": "Specify the length of seeds to be used for BWA aln.", - "help_text": "Specify the length of the seed used in BWA aln. Default is set to be 'turned off' at the recommendation of [Oliva et al. 2021](https://doi.org/10.1093/bib/bbab076) who tested when aligning to human reference genomes. Seeding is 'turned off' by specifying an arbitrarily long number to force the entire read to act as the seed.\n\nNote: Despite being recommended, turning off seeding can result in long runtimes!\n\n> Modifies BWA aln parameter: `-l`", + "description": "Specify the -l parameter for BWA aln i.e. the length of seeds to be used.", + "help_text": "Configures the length of the seed used in bwa aln `-l`. Default is set to be 'turned off' at the recommendation of [Oliva et al. 2021](https://doi.org/10.1093/bib/bbab076) who tested when aligning to human reference genomes. Seeding is 'turned off' by specifying an arbitrarily long number to force the entire read to act as the seed. \n\nNote: Despite being recommended, turning off seeding can result in long runtimes!\n\n> Modifies BWA aln parameter: `-l`", "fa_icon": "fas fa-ruler-horizontal" }, "mapping_bwaaln_o": { "type": "integer", "default": 2, - "description": "Specify the number of gaps allowed for alignment with BWA aln.", - "help_text": "Specify the number of gaps allowed for mapping with BWA aln. Default is set to BWA default.\n\n> Modifies BWA aln parameter: `-o`", + "description": "Specify the -o parameter for BWA aln i.e. the number of gaps allowed.", + "help_text": "Configures the number of gaps used in bwa aln. Default is set to bwa default.\n\n> Modifies BWA aln parameter: `-o`", "fa_icon": "fas fa-people-arrows" }, "mapping_bwamem_k": { "type": "integer", "default": 19, - "description": "Specify the minimum seed length for alignment with BWA mem.", - "help_text": "Configures the minimum seed length used in BWA mem. Default is set to BWA default.\n\n> Modifies BWA mem parameter: `-k`", + "description": "Specify the -k parameter for BWA mem i.e. the minimum seed length", + "help_text": "Configures the minimum seed length used in BWA-MEM. Default is set to BWA default.\n\n> Modifies BWA-MEM parameter: `-k`", "fa_icon": "fas fa-seedling" }, "mapping_bwamem_r": { "type": "number", "default": 1.5, - "description": "Specify the re-seeding threshold for alignment with BWA mem.", - "help_text": "Configures the re-seeding threshold used in BWA mem. Default is set to BWA default.\n\n> Modifies BWA mem parameter: `-r`", + "description": "Specify the -k parameter for BWA mem i.e. the minimum seed length", + "help_text": "Configures the re-seeding used in BWA-MEM. Default is set to BWA default.\n\n> Modifies BWA-MEM parameter: `-r`", "fa_icon": "fas fa-angle-double-left" }, "mapping_bowtie2_alignmode": { "type": "string", "default": "local", - "description": "Specify the Bowtie 2 alignment mode.", - "help_text": "Specify the type of read alignment to use with Bowtie 2. 'Local' allows only partial alignment of read with ends of reads possibly 'soft-clipped' (i.e. remain unaligned/ignored), if the soft-clipped alignment provides best alignment score. 'End-to-end' requires all nucleotides to be aligned.\nDefault is set following [Cahill et al (2018)](https://doi.org/10.1093/molbev/msy018) and [Poullet and Orlando 2020](https://www.frontiersin.org/articles/10.3389/fevo.2020.00105/full)\n\n> Modifies Bowtie 2 presets: `--local`, `--end-to-end`", - "fa_icon": "fas fa-toggle-on", + "description": "Specify the bowtie2 alignment mode.", + "help_text": "The type of read alignment to use. Local allows only partial alignment of read, with ends of reads possibly 'soft-clipped' (i.e. remain unaligned/ignored), if the soft-clipped alignment provides best alignment score. End-to-end requires all nucleotides to be aligned. \nDefault is set following [Cahill et al (2018)](https://doi.org/10.1093/molbev/msy018) and [Poullet and Orlando 2020](https://www.frontiersin.org/articles/10.3389/fevo.2020.00105/full)\n\n> Modifies Bowtie2 presets: `--local`, `--end-to-end`", + "fa_icon": "fas fa-arrows-alt-h", "enum": ["local", "end-to-end"] }, "mapping_bowtie2_sensitivity": { "type": "string", "default": "sensitive", - "description": "Specify the level of sensitivity for the Bowtie 2 alignment mode.", - "help_text": "Specify the Bowtie 2 'preset' to use. These strings apply to both `--mapping_bowtie2_alignmode` options. See the Bowtie 2 manual for actual settings.\nDefault is set following [Poullet and Orlando (2020)](https://www.frontiersin.org/articles/10.3389/fevo.2020.00105/full), when running damaged-data without UDG treatment.\n\n> Modifies the Bowtie 2 parameters: `--fast`, `--very-fast`, `--sensitive`, `--very-sensitive`, `--fast-local`, `--very-fast-local`, `--sensitive-local`, `--very-sensitive-local`", + "description": "Specify the level of sensitivity for the bowtie2 alignment mode.", + "help_text": "The Bowtie2 'preset' to use. These strings apply to both --mapping_bowtie2_alignmode options. See the Bowtie2 manual for actual settings. \nDefault is set following [Poullet and Orlando (2020)](https://www.frontiersin.org/articles/10.3389/fevo.2020.00105/full), when running damaged-data without UDG treatment)\n\nModifies the Bowtie2 parameters: `--fast`, `--very-fast`, `--sensitive`, `--very-sensitive`, `--fast-local`, `--very-fast-local`, `--sensitive-local`, `--very-sensitive-local`", "fa_icon": "fas fa-microscope", "enum": ["fast", "very-fast", "sensitive", "very-sensitive"] }, "mapping_bowtie2_n": { "type": "integer", "default": 0, - "description": "Specify the number of mismatches in seed for alignment with Bowtie 2.", - "help_text": "Specify the number of mismatches allowed in the seed during seed-and-extend procedure of Bowtie 2. This will override any values set with `--mapping_bowtie2_sensitivity`. Can either be 0 or 1.\n\n> Modifies Bowtie 2 parameter: `-N`", + "description": "Specify the -N parameter for bowtie2 (mismatches in seed). This will override defaults from alignmode/sensitivity.", + "help_text": "The number of mismatches allowed in the seed during seed-and-extend procedure of Bowtie2. This will override any values set with --mapping_bowtie2_sensitivity. Can either be 0 or 1.\n\n>Modifies Bowtie2 parameter: `-N`", "fa_icon": "fas fa-sort-numeric-down" }, "mapping_bowtie2_l": { "type": "integer", "default": 20, - "description": "Specify the length of seed substrings for Bowtie 2.", - "help_text": "Specify the length of the seed sub-string to use during seeding of Bowtie 2. This will override any values set with `--mapping_bowtie2_sensitivity`.\n\n> Modifies Bowtie 2 parameter: `-L`", + "description": "Specify the -L parameter for bowtie2 (length of seed substrings). This will override defaults from alignmode/sensitivity.", + "help_text": "The length of the seed sub-string to use during seeding. This will override any values set with --mapping_bowtie2_sensitivity.\n\n> Modifies Bowtie2 parameter: `-L`", "fa_icon": "fas fa-ruler-horizontal" }, "mapping_bowtie2_trim5": { "type": "integer", "default": 0, - "description": "Specify the number of bases to trim off from 5 prime end of read before alignment with Bowtie 2.", - "help_text": "Specify the number of bases to trim at the 5' (left) end of read before alignment with Bowtie 2. This may be useful when left-over sequencing artefacts of in-line barcodes are present.\n\n> Modifies Bowtie 2 parameter: `--trim5`", + "description": "Specify number of bases to trim off from 5' (left) end of read before alignment.", + "help_text": "Number of bases to trim at the 5' (left) end of read prior alignment. Maybe useful when left-over sequencing artefacts of in-line barcodes present.\n\n> Modifies Bowtie2 parameter: `--trim5`", "fa_icon": "fas fa-cut" }, "mapping_bowtie2_trim3": { "type": "integer", "default": 0, - "description": "Specify the number of bases to trim off from 3 prime end of read before alignment with Bowtie 2.", - "help_text": "Specify the number of bases to trim at the 3' (right) end of read before alignment with Bowtie 2. This may be useful when left-over sequencing artefacts of in-line barcodes are present.\n\n> Modifies Bowtie 2 parameter: `--trim3`", + "description": "Specify number of bases to trim off from 3' (right) end of read before alignment.", + "help_text": "Number of bases to trim at the 3' (right) end of read prior alignment. Maybe useful when left-over sequencing artefacts of in-line barcodes present.\n\n> Modifies Bowtie2 parameter: `--trim3`", "fa_icon": "fas fa-cut" }, "mapping_bowtie2_maxins": { @@ -604,13 +603,7 @@ "description": "Specify the maximum fragment length for Bowtie2 paired-end mapping mode only.", "help_text": "The maximum fragment for valid paired-end alignments. Only for paired-end mapping (i.e. unmerged), and therefore typically only useful for modern data.\n\n> Modifies Bowtie2 parameter: `--maxins`", "fa_icon": "fas fa-exchange-alt" - }, - "elongation_factor": { - "type": "integer", - "default": 500, - "description": "Specify the number of bases to extend reference by (circularmapper only)", - "help_text": "The number of bases to extend the reference genome with. By default this is set to 500 if not specified otherwise.", - "fa_icon": "fas fa-external-link-alt" + } }, "fa_icon": "fas fa-layer-group" }, @@ -622,54 +615,54 @@ "properties": { "run_bamfiltering": { "type": "boolean", - "description": "Specify to turn on filtering of reads in BAM files after mapping. By default, only mapped reads retained.", + "description": "Turn on filtering of reads in BAM files after mapping. By default, only mapped reads retained.", "fa_icon": "fas fa-power-off", - "help_text": "Specify to turns on the filtering subworkflow for mapped BAM files after the read alignment step. Filtering includes removal of unmapped reads, length filtering, and mapping quality filtering.\n\nWhen turning on BAM filtering, by default only the mapped/unmapped filter is activated, thus only mapped reads are retained for downstream analyses. See `--bamfiltering_retainunmappedgenomicbam` to retain unmapped reads, if filtering only for length and/or quality is preferred.\n\nNote this subworkflow can also be activated if `--run_metagenomic_screening` is supplied." + "help_text": "Turns on the filtering subworkflow for mapped BAM files coming out of the read alignment step. Filtering includes removal of unmapped reads, length filtering, and mapping quality filtering.\n\nWhen turning on bam filtering, by default only the mapped/unmapped filter is activated, thus only mapped reads are retained for downstream analyses. See `--bamfiltering_retainunmappedgenomicbam` to retain unmapped reads, if filtering only for length and/or quality is preferred.\n\nNote this subworkflow can also be activated if `--run_metagenomic_screening` is supplied." }, "bamfiltering_minreadlength": { "type": "integer", "default": 0, "description": "Specify the minimum read length mapped reads should have for downstream genomic analysis.", - "help_text": "Specify to remove mapped reads that fall below a certain length threshold after mapping.\n\nThis can be useful to get more realistic 'endogenous DNA' or 'on target read' percentages.\n\nIf used _instead of_ minimum length read filtering at AdapterRemoval, you can get more more realistic endogenous DNA estimates when most of your reads are very short (e.g. in single-stranded libraries or samples with highly degraded DNA). In these cases, the default minimum length filter at earlier adapter clipping/read merging will remove a very large amount of your reads in your library (including valid reads), thus making an artificially small denominator for a typical endogenous DNA calculation.\n\nTherefore by retaining all of your reads until _after_ mapping (i.e., turning off the adapter clipping/read merging filter), you can generate more 'real' endogenous DNA estimates immediately after mapping (with a better denominator). Then after estimating this, filter using this parameter to retain only 'useful' reads (i.e., those long enough to provide higher confidence of their mapped position) for downstream analyses.\n\nBy specifying `0`, no length filtering is performed.\n\nNote that by default the output BAM files of this step are _not_ stored in the results directory (as it is assumed that deduplicated BAM files are preferred). See `--bamfiltering_savefilteredbams` if you wish to save these.\n\n> Modifies filter_bam_fragment_length.py parameter: `-l`", - "fa_icon": "fas fa-filter" + "help_text": "You can use this to remove mapped reads that fall below a certain length after mapping.\n\nThis can be useful to get more realistic 'endogenous DNA' or 'on target read' percentages.\n\nIf used _instead of_ minimum length read filtering at AdapterRemoval, you can get more more realistic endogenous DNA estimates when most of your reads are very short (e.g. in single-stranded libraries or samples with highly degraded DNA). In these cases, the default minimum length filter at earlier adapter clipping/read merging will remove a very large amount of your reads in your library (including valid reads), thus making an artificially small denominator for a typical endogenous DNA calculation. \n\nTherefore by retaining all of your reads until _after_ mapping (i.e., turning off the adapter clipping/read merging filter), you can generate more 'real' endogenous DNA estimates immediately after mapping (with a better denominator). Then after estimating this, filter using this parameter to retain only 'useful' reads (i.e., those long enough to provide higher confidence of their mapped position) for downstream analyses.\n\nBy specifying `0`, no length filtering is performed.\n\nNote that by default the output BAM files of this step are _not_ stored in the results directory (as it is assumed that deduplicated BAM files are preferred). See `--bamfiltering_savefilteredbams` if you wish to save these.\n\n> Modifies tool parameter(s):\n> - filter_bam_fragment_length.py: `-l`", + "fa_icon": "fas fa-ruler-horizontal" }, "bamfiltering_mappingquality": { "type": "integer", "default": 0, "description": "Specify the minimum mapping quality reads should have for downstream genomic analysis.", - "help_text": "Specify a mapping quality threshold for mapped reads to be kept for downstream analysis.\n\nBy default all reads are retained and this option is therefore set to 0 to ensure no quality filtering is performed.\n\nNote that by default the output BAM files of this step are _not_ stored in the results directory (as it is assumed that deduplicated BAM files are preferred). See `--bamfiltering_savefilteredbams` if you wish to save these.\n\n> Modifies samtools view parameter: `-q`", - "fa_icon": "fas fa-filter" + "help_text": "Specify a mapping quality threshold for mapped reads to be kept for downstream analysis.\n\nBy default all reads are retained and is therefore set to 0 to ensure no quality filtering is performed.\n\nNote that by default the output BAM files of this step are _not_ stored in the results directory (as it is assumed that deduplicated BAM files are preferred). See `--bamfiltering_savefilteredbams` if you wish to save these.\n\n> Modifies tool parameter(s):\n> - samtools view `-q`", + "fa_icon": "fas fa-thermometer-full" }, "bamfilter_genomicbamfilterflag": { "type": "integer", "default": 4, "fa_icon": "fas fa-flag", - "description": "Specify the SAM format flag of reads to remove during BAM filtering for downstream genomic steps.", - "help_text": "Specify to customise the exact SAM format flag of reads you wish to _remove_ from your BAM file to for downstream _genomic_ analyses.\n\nYou can explore more using a tool from the Broad Institute [here](https://broadinstitute.github.io/picard/explain-flags.html)\n\n> ⚠️ Modify at your own risk, alternative flags are not necessarily supported in downstream steps!\n\n> Modifies samtools parameter: `-F`" + "description": "Specify the SAM format flag of reads to remove during BAM filtering for downstream genomic steps. Generally not recommended to change.", + "help_text": "You can use this to customise the exact SAM format flag of reads you wish to _remove_ from your BAM file to for downstream _genomic_ analyses.\n\nYou can explore more using a tool from the Broad institute [here](https://broadinstitute.github.io/picard/explain-flags.html)\n\n> \u26a0\ufe0f Modify at your own risk, alternative flags are not necessarily supported in downstream steps!\n\n> Modifies tool parameter(s):\n> - SAMtools: `-F`" }, "bamfiltering_retainunmappedgenomicbam": { "type": "boolean", "description": "Specify to retain unmapped reads in the BAM file used for downstream genomic analyses.", - "help_text": "Specify to retain unmapped reads (optionally also length-filtered) in the genomic BAM for downstream analysis. By default, the pipeline only keeps mapped reads for downstream analysis.\n\nThis is also turned on if `--metagenomicscreening_input` is set to `all`.\n\n> ⚠️ This will likely slow down run time of downstream pipeline steps!\n\n> Modifies samtools view parameters: `-f 4` / `-F 4`", + "help_text": "You can use this parameter to retain unmapped reads (optionally also length filtered) in the genomic BAM for downstream analysis. By default, the pipeline only keeps mapped reads for downstream analysis.\n\nThis is also turned on if `--metagenomicscreening_input` is set to `all`.\n\n> \u26a0\ufe0f This will likely slow down run time of downstream pipeline steps!\n\n> Modifies tool parameter(s):\n> - samtools view: `-f 4` / `-F 4`", "fa_icon": "fas fa-piggy-bank" }, "bamfiltering_generateunmappedfastq": { "type": "boolean", - "description": "Specify to generate FASTQ files containing only unmapped reads from the aligner generated BAM files.", - "help_text": "Specify to turn on the generation and saving of FASTQs of only the unmapped reads from the mapping step in the results directory.\n\nThis can be useful if you wish to do other analysis of the unmapped reads independently of the pipeline.\n\nNote: the reads in these FASTQ files have _not_ undergone length of quality filtering\n\n> Modifies samtools fastq parameter: `-f 4`", + "description": "Generate FASTQ files containing only unmapped reads from the aligner generated BAM files.", + "help_text": "This turns on the generation and saving of FASTQs of only the unmapped reads from the mapping step in the results directory, using `samtools fastq`.\n\nThis could be useful if you wish to do other analysis of the unmapped reads independently of the pipeline.\n\nNote: the reads in these FASTQ files have _not_ undergone length of quality filtering\n\n> Modifies tool parameter(s):\n> - samtools fastq: `-f 4`", "fa_icon": "fas fa-file-alt" }, "bamfiltering_generatemappedfastq": { "type": "boolean", - "description": "Specify to generate FASTQ files containing only mapped reads from the aligner generated BAM files.", - "help_text": "Specify to turn on the generation and saving of FASTQs of only the mapped reads from the mapping step in the results directory.\n\nThis can be useful if you wish to do other analysis of the mapped reads independently of the pipeline, such as remapping with different parameters (whereby only including mapped reads will speed up computation time during the re-mapping due to reduced input data).\n\nNote the reads in these FASTQ files have _not_ undergone length of quality filtering\n\n> Modifies samtools fastq parameter: `-F 4`", + "description": "Generate FASTQ files containing only mapped reads from the aligner generated BAM files .", + "help_text": "This turns on the generation and saving of FASTQs of only the mapped reads from the mapping step in the results directory, using `samtools fastq`.\n\nThis could be useful if you wish to do other analysis of the mapped reads independently of the pipeline, such as remapping with different parameters (whereby only including mapped reads will speed up computation time during the re-mapping due to reduced input data).\n\nNote the reads in these FASTQ files have _not_ undergone length of quality filtering\n\n> Modifies tool parameter(s):\n> - samtools fastq: `-F 4`", "fa_icon": "far fa-file-alt" }, "bamfiltering_savefilteredbams": { "type": "boolean", - "description": "Specify to save the intermediate filtered genomic BAM files in the results directory.", - "help_text": "Specify to save intermediate length- and/or quality-filtered genomic BAM files in the results directory.", - "fa_icon": "fas fa-save" + "description": "Save in the results directory the intermediate filtered genomic BAM files that are sent for downstream genomic analyses.", + "help_text": "This saves intermediate length and/or quality filtered genomic BAM files in the results directory.", + "fa_icon": "far fa-save" } }, "fa_icon": "fas fa-filter" @@ -677,64 +670,64 @@ "metagenomics": { "title": "Metagenomics", "type": "object", - "description": "Options related to metagenomic screening.", + "description": "Options to related to metagenomic screening.", "default": "", "properties": { "run_metagenomicscreening": { "type": "boolean", - "description": "Specify to turn on metagenomic screening of mapped, unmapped or all reads.", + "description": "Turn on metagenomic screening of mapped, unmapped, or all reads.", "fa_icon": "fas fa-power-off", - "help_text": "Specify to turn on the metagenomic screening subworkflow of the pipeline, where reads are screened against large databases. Typically used for pathogen screening or microbial community analysis.\n\nIf supplied, this will also turn on the BAM filtering subworkflow of the pipeline." + "help_text": "Turns on the metagenomic screening subworkflow of the pipeline, where reads are screened against large databases. Typically used for pathogen screening or microbial community analysis.\n\nIf supplied, this will also turn on the BAM filtering subworkflow of the pipeline." }, "metagenomicscreening_input": { "type": "string", "default": "unmapped", - "description": "Specify which type of reads to use for metagenomic screening.", + "description": "Specify which type of reads to go into metagenomic screening.", "enum": ["unmapped", "mapped", "all"], "fa_icon": "fas fa-hand-pointer", - "help_text": "Specify to select which mapped reads will be sent for metagenomic analysis.\n\nThis influences which reads are sent to this step, whether you want unmapped reads (used in most cases, as 'host reads' can often be contaminants in microbial genomes), mapped reads (e.g, when doing competitive against a genomic reference of multiple genomes and which to apply LCA correction) or all reads.\n\n> ⚠️ If you skip paired-end merging, all reads will be screened as independent reads - not as pairs! - as all FASTQ files from BAM filtering are merged into one. This merged file is _not_ saved in results directory.\n\n> Modifies samtools fastq parameters: `-f 4` / `-F 4`" + "help_text": "You can select which reads coming out of the read alignment step will be sent for metagenomic analysis.\n\nThis influences which reads are sent to this step, whether you want unmapped reads (used in most cases, as 'host reads' can often be contaminants in microbial genomes), mapped reads (e.g, when doing competitive against a genomic reference of multiple genomes and which to apply LCA correction), or all reads.\n\n> \u26a0\ufe0f If you skip paired-end merging, all reads will be screened as independent reads - not as pairs! - as all FASTQ files from BAM filtering are merged into one. This merged file is _not_ saved in results directory.\n\n> Modifies tool parameter(s):\n> - samtools fastq: `-f 4` / `-F 4`" }, "run_metagenomics_complexityfiltering": { "type": "boolean", "fa_icon": "fas fa-power-off", - "help_text": "Specify to turn on a subworkflow of the pipeline that filters the FASTQ files for complexity before the metagenomics profiling.\nUse the metagenomics_complexity_tool parameter to select a method.", - "description": "Specify to run a complexity filter on the metagenomics input files before classification." + "help_text": "Turns on a subworkflow of the pipeline that filters the fastq-files for complexity before the metagenomics profiling\nUse the metagenomics_complexity_tool parameter to select a method", + "description": "Run a complexity filter on the metagenomics input files before classification. Specifiy the tool to use with the `metagenomics_complexity_tool` parameter, save with `metagenomics_complexity_savefastq`" }, "metagenomics_complexity_savefastq": { "type": "boolean", "fa_icon": "fas fa-save", - "description": "Specify to save FASTQ files containing the complexity-filtered reads before metagenomic classification.", - "help_text": "Specify to save the complexity-filtered FASTQ files to the results directory." + "description": "Save FASTQ files containing the complexity filtered reads (before metagenomic classification).", + "help_text": "Save the complexity-filtered fastq-files to the results directory" }, "metagenomics_complexity_tool": { "type": "string", "default": "bbduk", - "description": "Specify which tool to use for trimming, filtering or reformatting of FASTQ reads that go into metagenomics screening.", + "description": "Specify which tool to use for trimming, filtering, or reformatting of fastq reads that go into metagenomics screening.", "enum": ["bbduk", "prinseq"], - "fa_icon": "fas fa-hammer", - "help_text": "Specify to select which tool is used to generate a final set of reads for the metagenomic classifier after any necessary trimming, filtering or reformatting of the reads.\n\nThis intermediate file is not saved in the results directory unless marked with `--metagenomics_complexity_savefastq`." + "fa_icon": "fas fa-hand-pointer", + "help_text": "You can select which tool is used to generate a final set of reads for the metagenomic classifier after any necessary trimming, filtering or reformatting of the reads.\n\nThis intermediate file is not saved in the results directory, unless marked with `--metagenomics_complexity_savefastq`." }, "metagenomics_complexity_entropy": { "type": "number", "fa_icon": "fas fa-sort-numeric-up", - "description": "Specify the entropy threshold under which a sequencing read will be complexity-filtered out.", + "description": "Specify the entropy threshold that under which a sequencing read will be complexity filtered out. This should be between 0-1.", "default": 0.3, - "help_text": "Specify the minimum 'entropy' value for complexity filtering for the BBDuk or PRINSEQ++ tools.\n\nThis value will only be used for PRINSEQ++ if `--metagenomics_prinseq_mode` is set to `entropy`.\n\nEntropy here corresponds to the amount of sequence variation existing within the read. Higher values correspond to more variety and thus will likely result in more specific matching to a taxon's reference genome. The trade-off here is fewer reads (or abundance information) available for having a confident identification.\n\n> Modifies parameters:\n> - BBDuk: `entropy=`\n> - PRINSEQ++: `-lc_entropy`" + "help_text": "Specify the minimum 'entropy' value for complexity filtering for the BBDuk or PRINSEQ++ tools.\n\nThis value will only be used for PRINSEQ++ if `--metagenomics_prinseq_mode` is set to `entropy`.\n\nEntropy here corresponds to the amount of sequence variation exists within the read. Higher values correspond to more variety, and thus will likely result in more specific matching to a taxon's reference genome. The trade off here is fewer reads (or abundance information) available for having a confident identification.\n\n> Modifies tool parameter(s):\n> - BBDuk: `entropy=`\n> - PRINSEQ++: `-lc_entropy`\n\n" }, "metagenomics_prinseq_mode": { "type": "string", "default": "entropy", "enum": ["entropy", "dust"], - "fa_icon": "fas fa-toggle-on", - "description": "Specify the complexity filter mode for PRINSEQ++.", - "help_text": "Specify the complexity filter mode for PRINSEQ++.\n\nUse the selected mode together with the correct flag:\n'dust' requires the `--metagenomics_prinseq_dustscore` parameter set\n'entropy' requires the `--metagenomics_complexity_entropy` parameter set\n\n> Modifies parameters:\n> - PRINSEQ++: `-lc_entropy`\n> - PRINSEQ++: `-lc_dust`" + "fa_icon": "fas fa-check-square", + "description": "Specify the complexity filter mode for PRINSEQ++", + "help_text": "Specify the complexity filter mode for PRINSEQ++ \n\nUse the selected mode together with the correct flag:\n'dust' requires the `--metagenomics_prinseq_dustscore` parameter set\n'entropy' requires the `--metagenomics_complexity_entropy` parameter set\n\n> Sets one of the tool parameter(s):\n> - PRINSEQ++: `-lc_entropy`\n> - PRINSEQ++: `-lc_dust`" }, "metagenomics_prinseq_dustscore": { "type": "number", "default": 0.5, - "fa_icon": "fas fa-filter", - "description": "Specify the minimum dust score for PRINTSEQ++ complexity filtering.", - "help_text": "Specify the minimum dust score below which low-complexity reads will be removed. A dust score is based on how often different tri-nucleotides occur along a read.\n\n> Modifies PRINSEQ++ parameter: `--lc_dust`" + "fa_icon": "fas fa-head-side-mask", + "description": "Specify the minimum dust score for PRINTSEQ++ complexity filtering", + "help_text": "Specify the minimum dust score below which low-complexity reads will be removed. A DUST score is based on how often different tri-nucleotides occur along a read.\n\n> Modifies tool parameter(s):\n> - PRINSEQ++: `--lc_dust`" } }, "fa_icon": "fas fa-search" @@ -754,9 +747,9 @@ "type": "string", "default": "markduplicates", "description": "Specify which tool to use for deduplication.", - "help_text": "Specify which duplicate read removal tool to use. While `markduplicates` is set by default, an ancient DNA specific read deduplication tool `dedup` is offered (see [Peltzer et al. 2016](https://doi.org/10.1186/s13059-016-0918-z) for details). The latter utilises both ends of paired-end data to remove duplicates (i.e. true exact duplicates, as markduplicates will over-zealously deduplicate anything with the same starting position even if the ends are different).\n\n> ⚠️ DeDup can only be used on collapsed (i.e. merged) reads from paired-end sequencing.", + "help_text": "Sets the duplicate read removal tool. Alternatively an ancient DNA specific read deduplication tool `dedup` (Peltzer et al. 2016) is offered. The latter utilises both ends of paired-end data to remove duplicates (i.e. true exact duplicates, as markduplicates will over-zealously deduplicate anything with the same starting position even if the ends are different).\n\n> \u26a0\ufe0f DeDup can only be used on collapsed (i.e. merged) reads from paired-end sequencing.", "enum": ["markduplicates", "dedup"], - "fa_icon": "fas fa-hammer" + "fa_icon": "fas fa-layer-group" } }, "fa_icon": "fas fa-clone" @@ -770,127 +763,127 @@ "properties": { "run_mapdamage_rescaling": { "type": "boolean", - "fa_icon": "fas fa-power-off", - "description": "Specify to turn on damage rescaling of BAM files using mapDamage2 to probabilistically remove damage.", - "help_text": "Specify to turn on mapDamage2's BAM rescaling functionality. This probabilistically replaces Ts back to Cs depending on the likelihood this reference-mismatch was originally caused by damage. If the library is specified to be single-stranded, this will automatically use the `--single-stranded` mode.\nThis process will ameliorate the effects of aDNA damage, but also increase reference-bias.\n\n**This functionality does not have any MultiQC output.**\n ⚠️ Rescaled libraries will not be merged with non-scaled libraries of the same sample for downstream genotyping, as the model may be different for each library. If you wish to merge these, please do this manually and re-run nf-core/eager using the merged BAMs as input.\n\n> Modifies mapDamage2 parameter: `--rescale`" + "fa_icon": "fas fa-map", + "description": "Turn on damage rescaling of BAM files using mapDamage2 to probabilistically remove damage.", + "help_text": "Turns on mapDamage2's BAM rescaling functionality. This probabilistically replaces Ts back to Cs depending on the likelihood this reference-mismatch was originally caused by damage. If the library is specified to be single stranded, this will automatically use the --single-stranded mode.\nThis process will ameliorate the effects of aDNA damage, but also increase reference-bias. \n\n**This functionality does not have any MultiQC output.**\nwarning: rescaled libraries will not be merged with non-scaled libraries of the same sample for downstream genotyping, as the model may be different for each library. If you wish to merge these, please do this manually and re-run nf-core/eager using the merged BAMs as input.\n\n> Modifies the `--rescale` parameter of mapDamage2" }, "damage_manipulation_rescale_seqlength": { "type": "integer", "default": 12, - "description": "Specify the length of read sequence to use from each side for rescaling.", - "help_text": "Specify the length in bp from the end of the read that mapDamage should rescale at both ends. This can be overridden by `--rescalelength*p`.\n\n> Modifies mapDamage2 parameter: `--seq-length`", + "description": "Length of read sequence to use from each side for rescaling. Can be overridden by `--rescalelength*p`.", + "help_text": "Specify the length in bp from the end of the read that mapDamage should rescale at both ends.\n\n> Modifies the `--seq-length` parameter of mapDamage2.", "fa_icon": "fas fa-ruler-horizontal" }, "damage_manipulation_rescale_length_5p": { "type": "integer", "default": 0, - "description": "Specify the length of read for mapDamage2 to rescale from 5 prime end.", - "help_text": "Specify the length in bp from the end of the read that mapDamage should rescale. This overrides `--rescale_seqlength`.\n\n> Modifies mapDamage2 parameter: `--rescale-length-5p`", + "description": "Length of read for mapDamage2 to rescale from 5p end. Only used if not 0, otherwise `--rescale_seqlength` used.", + "help_text": "Specify the length in bp from the end of the read that mapDamage should rescale. Overrides `--rescale_seqlength`.\n\n> Modifies the `--rescale-length-5p` parameter of mapDamage2.", "fa_icon": "fas fa-balance-scale-right" }, "damage_manipulation_rescale_length_3p": { "type": "integer", "default": 0, - "description": "Specify the length of read for mapDamage2 to rescale from 3 prime end.", - "help_text": "Specify the length in bp from the end of the read that mapDamage should rescale. This overrides `--rescale_seqlength`.\n\n> Modifies mapDamage2 parameter `--rescale-length-3p`", + "description": "Length of read for mapDamage2 to rescale from 3p end. Only used if not 0 otherwise `--rescale_seqlength` used.", + "help_text": "Specify the length in bp from the end of the read that mapDamage should rescale. Overrides `--rescale_seqlength`.\n\n> Modifies the `--rescale-length-3p` parameter of mapDamage2.", "fa_icon": "fas fa-balance-scale-left" }, "run_pmd_filtering": { "type": "boolean", - "description": "Specify to turn on PMDtools filtering.", - "help_text": "Specify to run PMDtools for damage-based read filtering in sequencing libraries.", + "description": "Turn on PMDtools filtering.", + "help_text": "Specifies to run PMDtools for damage based read filtering in sequencing libraries.", "fa_icon": "fas fa-power-off" }, "damage_manipulation_pmdtools_threshold": { "type": "integer", "default": 3, "fa_icon": "far fa-chart-bar", - "description": "Specify PMD score threshold for PMDtools.", - "help_text": "Specify the PMDScore threshold to use when filtering BAM files for DNA damage. Only reads which surpass this damage score are considered for downstream analysis.\n\n> Modifies PMDtools parameter: `--threshold`" + "description": "Specify PMDScore threshold for PMDtools.", + "help_text": "Specifies the PMDScore threshold to use in the pipeline when filtering BAM files for DNA damage. Only reads which surpass this damage score are considered for downstream DNA analysis.\n\n> Modifies PMDtools parameter: `--threshold`" }, "damage_manipulation_pmdtools_masked_reference": { "type": "string", "fa_icon": "fas fa-mask", - "help_text": "Specify a FASTA file to use as reference for `samtools calmd` prior to PMD filtering.\nSetting the SNPs that are part of the used capture set as `N` can alleviate reference bias when running PMD filtering on capture data, where you might not want the allele of a SNP to be counted as damage when it is a transition.", - "description": "Specify a masked FASTA file with positions to be used with PMDtools.", + "help_text": "Supplying a FASTA file will use this file as reference for `samtools calmd` prior to PMD filtering. /nSetting the SNPs that are part of the used capture set as `N` can alleviate reference bias when running PMD filtering on capture data, where you might not want the allele of a SNP to be counted as damage when it is a transition.", + "description": "Specify a masked FASTA file with positions to be used with pmdtools.", "pattern": "^\\S+\\.fa?(\\sta)$", "format": "file-path" }, "damage_manipulation_pmdtools_reference_mask": { "type": "string", "fa_icon": "fas fa-mask", - "help_text": "Specify a BED file to activate masking of the reference FASTA at the contained sites prior to running PMDtools. Positions that are in the provided BED file will be replaced by Ns in the reference genome.\nThis can alleviate reference bias when running PMD filtering on capture data, where you might not want the allele of a transition SNP to be counted as damage. Masking of the reference is done using `bedtools maskfasta`.", - "description": "Specify a BED file to be used to mask the reference FASTA prior to running PMDtools.", + "help_text": "Supplying a bedfile to this parameter activates masking of the reference fasta at the contained sites prior to running PMDtools. Positions that are in the provided bedfile will be replaced by Ns in the reference genome. \nThis can alleviate reference bias when running PMD filtering on capture data, where you might not want the allele of a transition SNP to be counted as damage. Masking of the reference is done using `bedtools maskfasta`.", + "description": "Specify a bedfile to be used to mask the reference fasta prior to running pmdtools.", "pattern": "^\\S+\\.bed?(\\.gz)$", "format": "file-path" }, "run_trim_bam": { "type": "boolean", - "fa_icon": "fas fa-power-off", - "description": "Specify to turn on BAM trimming for non-UDG or half-UDG libraries.", - "help_text": "Specify to turn on the BAM trimming of [n] bases from reads in the deduplicated BAM file. Damage assessment in PMDtools or DamageProfiler remains untouched, as data is routed through this independently. BAM trimming is typically performed to reduce errors during genotyping that can be caused by aDNA damage.\n\nBAM trimming will only affect libraries with 'damage_treatment' of 'none' or 'half'. Complete UDG treatment ('full') should have removed all damage during library construction, so trimming of 0 bp is performed. The amount of bases that will be trimmed off from each side of the molecule should be set separately for libraries depending on their 'strandedness' and 'damage_treatment'.\n\n> Note: additional artefacts such as barcodes or adapters should be removed prior to mapping and not in this step." + "fa_icon": "fas fa-eraser", + "description": "Turn on BAM trimming. Will only affect non-UDG or half-UDG libraries.", + "help_text": "Turns on the BAM trimming method. Trims off [n] bases from reads in the deduplicated BAM file. Damage assessment in PMDtools or DamageProfiler remains untouched, as data is routed through this independently. BAM trimming is typically performed to reduce errors during genotyping that can be caused by aDNA damage.\n\nBAM trimming will only affect libraries with 'damage_treatment' of 'none' or 'half'. Complete UDG treatment ('full') should have removed all damage, during library construction so trimming of 0 bp is performed. The amount of bases that will be trimmed off from each side of the molecule should be set separately for libraries with depending on their 'strandedness' and 'damage_treatment'.\n\n> Note: additional artefacts such as bar-codes or adapters should be removed prior to mapping and not in this step." }, "damage_manipulation_bamutils_trim_double_stranded_none_udg_left": { "type": "integer", "default": 0, - "fa_icon": "fas fa-cut", - "description": "Specify the number of bases to clip off reads from 'left' (5 prime) end of reads for double-stranded non-UDG libraries.", - "help_text": "Specify the number of bases to clip off reads from 'left' (5 prime) end of reads for double-stranded non-UDG libraries. By default, this is set to 0, and therefore clips off no bases on the left side of reads from double-stranded libraries whose UDG treatment is set to 'none'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-L`" + "fa_icon": "fas fa-ruler-combined", + "description": "Specify the number of bases to clip off reads from 'left' end of read for double-stranded non-UDG libraries.", + "help_text": "Default is set to 0, and therefore clips off no bases on the left side of reads from double-stranded libraries whose UDG treatment is set to'none'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-L`" }, "damage_manipulation_bamutils_trim_double_stranded_none_udg_right": { "type": "integer", "default": 0, - "fa_icon": "fas fa-cut", - "description": "Specify the number of bases to clip off reads from 'right' (3 prime) end of reads for double-stranded non-UDG libraries.", - "help_text": "Specify the number of bases to clip off reads from 'right' (3 prime) end of reads for double-stranded non-UDG libraries. By default, this is set to 0, and therefore clips off no bases on the right side of reads from double-stranded libraries whose UDG treatment is set to 'none'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-R`" + "fa_icon": "fas fa-ruler", + "description": "Specify the number of bases to clip off reads from 'right' end of read for double-stranded non-UDG libraries.", + "help_text": "Default is set to 0, and therefore clips off no bases on the right side of reads from double-stranded libraries whose UDG treatment is set to'none'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-R`" }, "damage_manipulation_bamutils_trim_double_stranded_half_udg_left": { "type": "integer", "default": 0, - "fa_icon": "fas fa-cut", - "help_text": "Specify the number of bases to clip off reads from 'left' (5 prime) end of read for double-stranded half-UDG libraries. By default, this is set to 0, and therefore clips off no bases on the left side of reads from double-stranded libraries whose UDG treatment is set to 'half'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-L`", - "description": "Specify the number of bases to clip off reads from 'left' (5 prime) end of read for double-stranded half-UDG libraries." + "fa_icon": "fas fa-ruler-combined", + "help_text": "Default is set to 0, and therefore clips off no bases on the left side of reads from double-stranded libraries whose UDG treatment is set to 'half'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-L`", + "description": "Specify the number of bases to clip off reads from 'left' end of read for double-stranded half-UDG libraries." }, "damage_manipulation_bamutils_trim_double_stranded_half_udg_right": { "type": "integer", "default": 0, - "fa_icon": "fas fa-cut", - "description": "Specify the number of bases to clip off reads from 'right' (3 prime) end of read for double-stranded half-UDG libraries.", - "help_text": "Specify the number of bases to clip off reads from 'right' (3 prime) end of read for double-stranded half-UDG libraries. By default, this is set to 0, and therefore clips off no bases on the right side of reads from double-stranded libraries whose UDG treatment is set to 'half'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-R`" + "fa_icon": "fas fa-ruler", + "description": "Specify the number of bases to clip off reads from 'right' end of read for double-stranded half-UDG libraries.", + "help_text": "Default is set to 0, and therefore clips off no bases on the right side of reads from double-stranded libraries whose UDG treatment is set to 'half'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-R`" }, "damage_manipulation_bamutils_trim_single_stranded_none_udg_left": { "type": "integer", "default": 0, - "fa_icon": "fas fa-cut", - "help_text": "Specify the number of bases to clip off reads from 'left' (5 prime) end of read for single-stranded non-UDG libraries. By default, this is set to 0, and therefore clips off no bases on the left side of reads from single-stranded libraries whose UDG treatment is set to 'none'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-L`", - "description": "Specify the number of bases to clip off reads from 'left' (5 prime) end of read for single-stranded non-UDG libraries." + "fa_icon": "fas fa-ruler-combined", + "help_text": "Default is set to 0, and therefore clips off no bases on the left side of reads from single-stranded libraries whose UDG treatment is set to 'none'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-L`", + "description": "Specify the number of bases to clip off reads from 'left' end of read for single-stranded non-UDG libraries." }, "damage_manipulation_bamutils_trim_single_stranded_none_udg_right": { "type": "integer", "default": 0, - "fa_icon": "fas fa-cut", - "description": "Specify the number of bases to clip off reads from 'right' (3 prime) end of read for single-stranded non-UDG libraries.", - "help_text": "Specify the number of bases to clip off reads from 'right' (3 prime) end of read for single-stranded non-UDG libraries. By default, this is set to 0, and therefore clips off no bases on the right side of reads from single-stranded libraries whose UDG treatment is set to 'none'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-R`" + "fa_icon": "fas fa-ruler", + "description": "Specify the number of bases to clip off reads from 'right' end of read for single-stranded non-UDG libraries.", + "help_text": "Default is set to 0, and therefore clips off no bases on the right side of reads from single-stranded libraries whose UDG treatment is set to 'none'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-R`" }, "damage_manipulation_bamutils_trim_single_stranded_half_udg_left": { "type": "integer", "default": 0, - "fa_icon": "fas fa-cut", - "help_text": "Specify the number of bases to clip off reads from 'left' (5 prime) end of read for single-stranded half-UDG libraries. By default, this is set to 0, and therefore clips off no bases on the left side of reads from single-stranded libraries whose UDG treatment is set to 'half'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-L`", - "description": "Specify the number of bases to clip off reads from 'left' (5 prime) end of read for single-stranded half-UDG libraries." + "fa_icon": "fas fa-ruler-combined", + "help_text": "Default is set to 0, and therefore clips off no bases on the left side of reads from single-stranded libraries whose UDG treatment is set to 'half'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-L`", + "description": "Specify the number of bases to clip off reads from 'left' end of read for single-stranded half-UDG libraries." }, "damage_manipulation_bamutils_trim_single_stranded_half_udg_right": { "type": "integer", "default": 0, - "fa_icon": "fas fa-cut", - "description": "Specify the number of bases to clip off reads from 'right' (3 prime) end of read for single-stranded half-UDG libraries.", - "help_text": "Specify the number of bases to clip off reads from 'right' (3 prime) end of read for single-stranded half-UDG libraries. By default, this is set to 0, and therefore clips off no bases on the right side of reads from single-stranded libraries whose UDG treatment is set to 'half'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-R`" + "fa_icon": "fas fa-ruler", + "description": "Specify the number of bases to clip off reads from 'right' end of read for single-stranded half-UDG libraries.", + "help_text": "Default is set to 0, and therefore clips off no bases on the right side of reads from single-stranded libraries whose UDG treatment is set to 'half'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-R`" }, "damage_manipulation_bamutils_softclip": { "type": "boolean", - "fa_icon": "fas fa-mask", - "description": "Specify to turn on soft-trimming instead of hard masking.", - "help_text": "Specify to turn on soft-trimming instead of hard masking of bases. By default, nf-core/eager uses hard trimming, which sets trimmed bases to 'N' with quality '!' in the BAM output. Turn this on to use soft-trimming instead, which masks reads at the read ends using the CIGAR string instead.\n\n> Modifies bamUtil's trimBam parameter: `-c`" + "fa_icon": "fas fa-paint-roller", + "description": "Turn on using soft-trimming instead of hard masking.", + "help_text": "By default, nf-core/eager uses hard trimming, which sets trimmed bases to 'N' with quality '!' in the BAM output. Turn this on to use soft-trimming instead, which masks reads at the read ends using the CIGAR string instead.\n\n> Modifies bam trimBam parameter: `-c`" } } }, @@ -903,28 +896,28 @@ "run_genotyping": { "type": "boolean", "fa_icon": "fas fa-power-off", - "description": "Specify to turn on genotyping of BAM files.", - "help_text": "Specify to turn on genotyping. `--genotyping_source` and `--genotyping_tool` must also be provided together with this option." + "description": "Turn on genotyping of BAM files.", + "help_text": "Turns on genotyping. `--genotyping_source` and `--genotyping_tool` must also be provided together with this option." }, "genotyping_source": { "type": "string", "description": "Specify which input BAM to use for genotyping.", - "help_text": "Specify which BAM file to use for genotyping, depending on what BAM processing modules you have turned on. Options are: 'raw' (to use the reads used as input for damage manipulation); 'pmd' (for pmdtools output); 'trimmed' (for base-clipped BAMs. Base-clipped-PMD-filtered BAMs if both filtering and trimming are requested); 'rescaled' (for mapDamage2 rescaling output).\nWarning: Depending on the parameters you provided, 'raw' can refer to all mapped reads, filtered reads (if BAM filtering has been performed), or the deduplicated reads (if deduplication was performed).", + "help_text": "Indicates which BAM file to use for genotyping, depending on what BAM processing modules you have turned on. Options are: 'raw' (to use the reads used as input for damage manipulation); 'pmd' (for pmdtools output); 'trimmed' (for base-clipped BAMs. Base-clipped-PMD-filtered BAMs if both filtering and trimming are requested); 'rescaled' (for mapDamage2 rescaling output).\nWarning: Depending on the parameters you provided, 'raw' can refer to all mapped reads, filtered reads (if bam filtering has been performed), or the deduplicated reads (if deduplication was performed).", "fa_icon": "fas fa-faucet", "enum": ["raw", "pmd", "trimmed", "rescaled"] }, "genotyping_tool": { "type": "string", - "fa_icon": "fas fa-hammer", + "fa_icon": "fas fa-tools", "enum": ["ug", "hc", "freebayes", "pileupcaller", "angsd"], - "help_text": "Specify which genotyper to use. Current options are: pileupCaller, ANGSD, GATK UnifiedGenotyper (v3.5), GATK HaplotypeCaller (v4) or FreeBayes.\n\n> Note that while UnifiedGenotyper is more suitable for low-coverage ancient DNA (HaplotypeCaller does de novo assembly around each variant site), be aware GATK v3.5 it is officially deprecated by the Broad Institute (but is used here for compatibility with MultiVCFAnalyzer).", - "description": "Specify which genotyper to use." + "help_text": "Specifies which genotyper to use. Current options are: GATK (v3.5) UnifiedGenotyper or GATK Haplotype Caller (v4); and the FreeBayes Caller.\n\n> Note that while UnifiedGenotyper is more suitable for low-coverage ancient DNA (HaplotypeCaller does de novo assembly around each variant site), be aware GATK 3.5 it is officially deprecated by the Broad Institute (but is used here for compatibility with MultiVCFAnalyzer).", + "description": "Specify which genotyper to use between: GATK UnifiedGenotyper, GATK HaplotypeCaller, Freebayes, or pileupCaller." }, "skip_bcftools_stats": { "type": "boolean", - "fa_icon": "fas fa-forward", - "description": "Specify to skip generation of VCF-based variant calling statistics with bcftools.", - "help_text": "Specify to disable running of `bcftools stats` against VCF files from GATK and FreeBayes genotypers.\n\nThis will automatically include the FASTA reference for INDEL-related statistics." + "fa_icon": "far fa-chart-bar", + "description": "Skip bcftools stats generation for VCF based variant calling statistics", + "help_text": "Disables running of `bcftools stats` against VCF files from GATK and FreeBayes genotypers.\n\nIf ran, `bcftools stats` will automatically include the FASTA reference for INDEL-related statistics." }, "genotyping_reference_ploidy": { "type": "integer", @@ -936,27 +929,27 @@ "genotyping_pileupcaller_min_base_quality": { "type": "integer", "default": 30, - "description": "Specify the base mapping quality to be used for genotyping with pileupCaller.", - "help_text": "Specify the minimum base quality to be used when generating the samtools mpileup used as input for genotyping with pileupCaller.\n\n> Modifies samtools mpileup parameter: `-Q`.", + "description": "The base mapping quality to be used for genotyping with pileupcaller.", + "help_text": "The minimum base quality to be used when generating the samtools mpileup used as input for genotyping with pileupCaller. \n\n> Modifies samtools mpileup parameter: `-Q`.", "fa_icon": "fas fa-filter" }, "genotyping_pileupcaller_min_map_quality": { "type": "integer", "default": 30, "fa_icon": "fas fa-filter", - "description": "Specify the minimum mapping quality to be used for genotyping with pileupCaller.", - "help_text": "Specify the minimum mapping quality to be used when generating the samtools mpileup used as input for genotyping with pileupCaller.\n\n> Modifies samtools mpileup parameter: `-q`." + "description": "The minimum mapping quality to be used for genotyping with pileupcaller.", + "help_text": "The minimum mapping quality to be used when generating the samtools mpileup used as input for genotyping with pileupCaller. \n\n> Modifies samtools mpileup parameter: `-q`." }, "genotyping_pileupcaller_bedfile": { "type": "string", "fa_icon": "fas fa-bed", - "help_text": "Specify a SNP panel in the form of a BED file of sites at which to generate a pileup for pileupCaller.", + "help_text": "Specify a SNP panel in the form of a bed file of sites at which to generate a pileup for pileupCaller.", "format": "file-path", - "description": "Specify the path to SNP panel in BED format for pileupCaller." + "description": "Specify the path to SNP panel in bed format for pileupCaller." }, "genotyping_pileupcaller_snpfile": { "type": "string", - "help_text": "Specify a SNP panel in [EIGENSTRAT](https://github.com/DReichLab/EIG/blob/master/CONVERTF/README) format of sites to be called with pileupCaller.", + "help_text": "Specify a SNP panel in [EIGENSTRAT](https://github.com/DReichLab/EIG/blob/master/CONVERTF/README) format, pileupCaller will call these sites.", "fa_icon": "fas fa-sliders-h", "format": "file-path", "description": "Specify the path to SNP panel in EIGENSTRAT format for pileupCaller." @@ -965,15 +958,15 @@ "type": "string", "default": "randomHaploid", "fa_icon": "fas fa-toolbox", - "description": "Specify the SNP calling method to use for genotyping with pileupCaller.", - "help_text": "Specify the SNP calling method to use for genotyping. 'randomHaploid' will randomly sample a read overlapping the SNP and produce a homozygous genotype with the allele supported by that read (often called 'pseudohaploid' or 'pseudodiploid'). 'randomDiploid` will randomly sample two reads overlapping the SNP and produce a genotype comprised of the two alleles supported by the two reads. 'majorityCall' will produce a genotype that is homozygous for the allele that appears in the majority of reads overlapping the SNP.\n\n> Modifies pileupCaller parameters: `--randomHaploid` `--randomDiploid` `--majorityCall`", + "description": "Specify the SNP calling method to use for genotyping.", + "help_text": "Specify the SNP calling method to use for genotyping. 'randomHaploid' will randomly sample a read overlapping the SNP, and produce a homozygous genotype with the allele supported by that read (often called 'pseudohaploid' or 'pseudodiploid'). 'randomDiploid` will randomly sample two reads overlapping the SNP and produce a genotype comprised of the two alleles supported by the two reads. 'majorityCall' will produce a genotype that is homozygous for the allele that appears in the majority of reads overlapping the SNP.\n\n> Modifies pileupCaller parameters: `--randomHaploid` `--randomDiploid` `--majorityCall`", "enum": ["randomHaploid", "randomDiploid", "majorityCall"] }, "genotyping_pileupcaller_transitions_mode": { "type": "string", "default": "AllSites", - "description": "Specify the calling mode for transitions with pileupCaller.", - "help_text": "Specify if genotypes of transition SNPs should be called, set to missing, or excluded from the genotypes respectively.\n\n> Modifies pileupCaller parameter: `--skipTransitions` `--transitionsMissing`", + "description": "Specify the calling mode for transitions.", + "help_text": "Specify if genotypes of transition SNPs should be called, set to missing, or excluded from the genotypes respectively. \n\n> Modifies pileupCaller parameter: `--skipTransitions` `--transitionsMissing`", "enum": ["AllSites", "TransitionsMissing", "SkipTransitions"], "fa_icon": "fas fa-toggle-on" }, @@ -982,13 +975,13 @@ "default": 30, "fa_icon": "fas fa-balance-scale-right", "description": "Specify GATK phred-scaled confidence threshold.", - "help_text": "Specify a GATK genotyper phred-scaled confidence threshold of a given SNP/INDEL call.\n\n> Modifies GATK UnifiedGenotyper or HaplotypeCaller parameter: `-stand_call_conf`" + "help_text": "If selected, specify a GATK genotyper phred-scaled confidence threshold of a given SNP/INDEL call.\n\n> Modifies GATK UnifiedGenotyper or HaplotypeCaller parameter: `-stand_call_conf`" }, "genotyping_gatk_dbsnp": { "type": "string", - "help_text": "Specify VCF file for output VCF SNP annotation, e.g. if you want to annotate your VCF file with 'rs' SNP IDs. Check GATK documentation for more information. Gzip not accepted.", + "help_text": "(Optional) Specify VCF file for output VCF SNP annotation e.g. if you want to annotate your VCF file with 'rs' SNP IDs. Check GATK documentation for more information. Gzip not accepted.", "fa_icon": "fas fa-pen-alt", - "description": "Specify VCF file for SNP annotation of output VCF files for GATK.", + "description": "Specify VCF file for SNP annotation of output VCF files. Optional. Gzip not accepted.", "pattern": "^\\S+\\.vcf$", "format": "file-path", "mimetype": "VCF" @@ -997,16 +990,16 @@ "type": "integer", "default": 250, "fa_icon": "fas fa-icicles", - "description": "Specify the maximum depth coverage allowed for genotyping with GATK before down-sampling is turned on.", - "help_text": "Specify the maximum depth coverage allowed for genotyping before down-sampling is turned on. Any position with a coverage higher than this value will be randomly down-sampled to this many reads.\n\n> Modifies GATK UnifiedGenotyper parameter: `-dcov`" + "description": "Maximum depth coverage allowed for genotyping before down-sampling is turned on.", + "help_text": "Maximum depth coverage allowed for genotyping before down-sampling is turned on. Any position with a coverage higher than this value will be randomly down-sampled to this many reads.\n\n> Modifies GATK UnifiedGenotyper parameter: `-dcov`" }, "genotyping_gatk_ug_out_mode": { "type": "string", "default": "EMIT_VARIANTS_ONLY", - "description": "Specify GATK UnifiedGenotyper output mode.", + "description": "Specify GATK output mode.", "enum": ["EMIT_VARIANTS_ONLY", "EMIT_ALL_CONFIDENT_SITES", "EMIT_ALL_SITES"], - "help_text": "Specify GATK UnifiedGenotyper output mode to use when producing the output VCF (i.e. produce calls for every site or just confidence sites.)\n\n> Modifies GATK UnifiedGenotyper parameter: `--output_mode`", - "fa_icon": "fas fa-toggle-on" + "help_text": "If GATK UnifiedGenotyper is selected as the genotyping tool, this defines the output mode to use when producing the output VCF (i.e. produce calls for every site or just confidence sites.)\n\n> Modifies GATK UnifiedGenotyper parameter: `--output_mode`", + "fa_icon": "fas fa-bullhorn" }, "genotyping_gatk_ug_genotype_mode": { "type": "string", @@ -1014,49 +1007,49 @@ "description": "Specify UnifiedGenotyper likelihood model.", "enum": ["SNP", "INDEL", "BOTH", "GENERALPLOIDYSNP", "GENERALPLOIDYINDEL"], "fa_icon": "fas fa-project-diagram", - "help_text": "Specify GATK UnifiedGenotyper likelihood model, i.e. whether to call only SNPs or INDELS etc.\n\n> Modifies GATK UnifiedGenotyper parameter: `--genotype_likelihoods_model`" + "help_text": "If GATK UnifiedGenotyper is selected as the genotyping tool, this sets which likelihood model to follow, i.e. whether to call only SNPs or INDELS etc.\n\n> Modifies GATK UnifiedGenotyper parameter: `--genotype_likelihoods_model`" }, "genotyping_gatk_ug_keeprealignbam": { "type": "boolean", - "fa_icon": "fas fa-save", + "fa_icon": "far fa-save", "description": "Specify to keep the BAM output of re-alignment around variants from GATK UnifiedGenotyper.", - "help_text": "Specify to output the BAMs that have realigned reads (with GATK (v3) IndelRealigner) around possible variants for improved genotyping with GATK UnifiedGenotyper in addition to the standard VCF output.\n\nThese BAMs will be stored in the same folder as the corresponding VCF files." + "help_text": "If GATK UnifiedGenotyper is selected as the genotyping tool, providing this parameter will output the BAMs that have realigned reads (with GATK's (v3) IndelRealigner) around possible variants for improved genotyping in addition to the standard VCF output.\n\nThese BAMs will be stored in the same folder as the corresponding VCF files." }, "genotyping_gatk_ug_defaultbasequalities": { "type": "integer", "default": -1, - "description": "Specify to supply a default base quality if a read is missing a base quality score.", - "help_text": "Specify a value to set base quality scores for genotyping with GATK UnifiedGenotyper, if reads are missing this information. Might be useful if you have 'synthetically' generated reads (e.g. chopping up a reference genome). Default is set to `-1` which is to not set any default quality (turned off).\n\n> Modifies GATK UnifiedGenotyper parameter: `--defaultBaseQualities`", + "description": "Supply a default base quality if a read is missing a base quality score. Setting to -1 turns this off.", + "help_text": "If GATK UnifiedGenotyper is selected as the genotyping tool, specify a value to set base quality scores, if reads are missing this information. Might be useful if you have 'synthetically' generated reads (e.g. chopping up a reference genome). Default is set to `-1` which is to not set any default quality (turned off). \n\n> Modifies GATK UnifiedGenotyper parameter: `--defaultBaseQualities`", "fa_icon": "fas fa-redo-alt" }, "genotyping_gatk_hc_out_mode": { "type": "string", "default": "EMIT_VARIANTS_ONLY", - "fa_icon": "fas fa-toggle-on", - "description": "Specify GATK HaplotypeCaller output mode.", - "help_text": "Specify the type of sites that should be included in the output VCF after genotyping with GATK HaplotypeCaller (i.e. produce calls for every site or just confidence sites).\n\n> Modifies GATK HaplotypeCaller parameter: `--output_mode`", + "fa_icon": "fas fa-bullhorn", + "description": "Specify GATK output mode.", + "help_text": "If GATK HaplotypeCaller is selected as the genotyping tool, this sets the type of sites that should be included in the output VCF (i.e. produce calls for every site or just confidence sites). \n\n> Modifies GATK HaplotypeCaller parameter: `--output_mode`", "enum": ["EMIT_VARIANTS_ONLY", "EMIT_ALL_CONFIDENT_SITES", "EMIT_ALL_ACTIVE_SITES"] }, "genotyping_gatk_hc_emitrefconf": { "type": "string", "default": "GVCF", - "fa_icon": "fas fa-toggle-on", + "fa_icon": "fas fa-bullhorn", "description": "Specify HaplotypeCaller mode for emitting reference confidence calls.", - "help_text": "Specify GATK HaplotypeCaller mode for emitting reference confidence calls.\n\n> Modifies GATK HaplotypeCaller parameter: `--emit-ref-confidence`", + "help_text": "If GATK HaplotypeCaller is selected as the genotyping tool, this sets the mode for emitting reference confidence calls.\n\n> Modifies GATK HaplotypeCaller parameter: `--emit-ref-confidence`", "enum": ["NONE", "BP_RESOLUTION", "GVCF"] }, "genotyping_freebayes_min_alternate_count": { "type": "integer", "default": 1, - "description": "Specify minimum required supporting observations of an alternate allele to consider a variant in FreeBayes.", - "help_text": "Specify the minimum count of observations supporting an alternate allele within a single individual in order to evaluate the position during genotyping with FreeBayes.\n\n> Modifies FreeBayes parameter: `-C`", - "fa_icon": "fas fa-filter" + "description": "Specify minimum required supporting observations of an alternate allele to consider a variant.", + "help_text": "Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position.\n\n> Modifies freebayes parameter: `-C`", + "fa_icon": "fas fa-align-center" }, "genotyping_freebayes_skip_coverage": { "type": "integer", "default": 0, - "description": "Specify to skip over regions of high depth by discarding alignments overlapping positions where total read depth is greater than specified in FreeBayes.", - "help_text": "Specify to skip over regions of high depth by discarding alignments overlapping positions where total read depth is greater than the specified value during genotyping with FreeBayes. This is set to 0 by default, which deactivates this behaviour.\n\n> Modifies FreeBayes parameter: `-g`", + "description": "Specify to skip over regions of high depth by discarding alignments overlapping positions where total read depth is greater than specified.", + "help_text": "Specify to skip over regions of high depth by discarding alignments overlapping positions where total read depth is greater than the specified value. Setting to 0 (the default) deactivates this behaviour.\n\n> Modifies freebayes parameter: `-g`", "fa_icon": "fab fa-think-peaks" }, "genotyping_angsd_glmodel": { @@ -1064,7 +1057,7 @@ "default": "samtools", "fa_icon": "fas fa-project-diagram", "description": "Specify which ANGSD genotyping likelihood model to use.", - "help_text": "Specify which genotype likelihood model to use in ANGSD.\n\n> Modifies ANGSD parameter: `-GL`", + "help_text": "Specify which genotype likelihood model to use.\n\n> Modifies angsd parameter: `-GL`", "enum": ["samtools", "gatk", "soapsnp", "syk"] }, "genotyping_angsd_glformat": { @@ -1072,7 +1065,7 @@ "default": "binary", "fa_icon": "fas fa-text-height", "description": "Specify the formatting of the output VCF for ANGSD genotype likelihood results.", - "help_text": "Specifies what type of genotyping likelihood file format will be output by ANGSD.\n\nThe options refer to the following descriptions respectively:\n\n- `binary`: binary output of all 10 log genotype likelihood\n- `beagle_binary`: beagle likelihood file\n- `binary_three`: binary 3 times likelihood\n- `text`: text output of all 10 log genotype likelihoods.\n\nSee the [ANGSD documentation](http://www.popgen.dk/angsd/) for more information on which to select for your downstream applications.\n\n> Modifies ANGSD parameter: `-doGlf`", + "help_text": "Specifies what type of genotyping likelihood file format will be output.\n\nThe options refer to the following descriptions respectively:\n\n- `binary`: binary output of all 10 log genotype likelihood\n- `beagle_binary`: beagle likelihood file\n- `binary_three`: binary 3 times likelihood\n- `text`: text output of all 10 log genotype likelihoods.\n\nSee the [ANGSD documentation](http://www.popgen.dk/angsd/) for more information on which to select for your downstream applications.\n\n> Modifies angsd parameter: `-doGlf`", "enum": ["binary", "beagle_binary", "binary_three", "text"] } }, @@ -1089,15 +1082,15 @@ "properties": { "run_mtnucratio": { "type": "boolean", - "description": "Specify to turn on mitochondrial to nuclear ratio calculation.", - "help_text": "Specify to turn on estimation of the ratio of mitochondrial to nuclear reads.", - "fa_icon": "fas fa-power-off" + "description": "Turn on mitochondrial to nuclear ratio calculation.", + "help_text": "Turn on the module to estimate the ratio of mitochondrial to nuclear reads.", + "fa_icon": "fas fa-balance-scale-left" }, "mitochondrion_header": { "type": "string", "default": "MT", - "description": "Specify the name of the reference FASTA entry corresponding to the mitochondrial genome.", - "help_text": "Specify the FASTA entry in the reference file specified as `--fasta`, which acts as the mitochondrial 'chromosome' to base the ratio calculation on. The tool only accepts the first section of the header before the first space. The default chromosome name is based on hs37d5/GrCH37 human reference genome.", + "description": "Specify the name of the reference FASTA entry corresponding to the mitochondrial genome (up to the first space).", + "help_text": "Specify the FASTA entry in the reference file specified as --fasta, which acts as the mitochondrial 'chromosome' to base the ratio calculation on. The tool only accepts the first section of the header before the first space. The default chromosome name is based on hs37d5/GrCH37 human reference genome.", "fa_icon": "fas fa-heading" } } @@ -1105,19 +1098,19 @@ "mapping_statistics": { "title": "Mapping statistics", "type": "object", - "description": "Options for the calculation of mapping statistics", + "description": "", "default": "", "properties": { "mapstats_skip_preseq": { "type": "boolean", - "help_text": "Specify to turn off the computation of library complexity estimation.", - "description": "Specify to turn off the computation of library complexity estimation with preseq.", + "help_text": "Turns off the computation of library complexity estimation.", + "description": "Turns off the computation of library complexity estimation.", "fa_icon": "fas fa-forward" }, "mapstats_preseq_mode": { "type": "string", "default": "c_curve", - "help_text": "Specify which mode of preseq to run.\n\nFrom the [preseq documentation](http://smithlabresearch.org/wp-content/uploads/manual.pdf):\n\nc curve is used to compute the expected complexity curve of a mapped read file with a hypergeometric formula\n\nlc extrap is used to generate the expected yield for theoretical larger experiments and bounds on the number of distinct reads in the library and the associated confidence intervals, which is computed by bootstrapping the observed duplicate counts histogram.", + "help_text": "Specify which mode of preseq to run.\n\nFrom the [PreSeq documentation](http://smithlabresearch.org/wp-content/uploads/manual.pdf):\n\nc curve is used to compute the expected complexity curve of a mapped read file with a hypergeometric formula\n\nlc extrap is used to generate the expected yield for theoretical larger experiments and bounds on the number of distinct reads in the library and the associated confidence intervals, which is computed by bootstrapping the observed duplicate counts histogram.", "description": "Specify which mode of preseq to run.", "fa_icon": "fas fa-toggle-on", "enum": ["c_curve", "lc_extrap"] @@ -1125,52 +1118,57 @@ "mapstats_preseq_stepsize": { "type": "integer", "default": 1000, - "description": "Specify the step size (i.e., sampling regularity) of preseq.", - "help_text": "Specify the step size of preseq's c_curve and lc_extrap methods. This can be useful when few reads are present and allow preseq to be used for extrapolation of shallow sequencing results.\n\n\n> Modifies preseq parameter:\n> `-s`", + "description": "Specify the step size (i.e., sampling regularity) of Preseq.", + "help_text": "Can be used to configure the step size of Preseq's c_curve and lc_extrap method. Can be useful when few reads and allow Preseq to be used for extrapolation of shallow sequencing results.\n\n\n> Modifies tool parameter(s)\n> - preseq: `-s`", "fa_icon": "fas fa-shoe-prints" }, "mapstats_preseq_terms": { "type": "integer", "default": 100, "fa_icon": "fas fa-sort-numeric-up-alt", - "help_text": "Specify the maximum number of terms that preseq's lc_extrap mode will use.\n\n> Modifies preseq lc_extrap parameter: `-x`", - "description": "Specify the maximum number of terms that preseq's lc_extrap mode will use." + "help_text": "Specify the maximum number of terms that lc_extrap mode will use.\n \n> Modifies preseq lc_extrap parameter: `-x`", + "description": "Specify the maximum number of terms that lc_extrap mode will use." }, "mapstats_preseq_maxextrap": { "type": "integer", "default": 10000000000, "fa_icon": "fas fa-ban", - "help_text": "Specify the maximum extrapolation that preseq's lc_extrap mode will perform.\n\n> Modifies preseq lc_extrap parameter: `-e`", - "description": "Specify the maximum extrapolation to use for preseq's lc_extrap mode." + "help_text": "Specify the maximum extrapolation that lc_extrap mode will perform.\n\n> Modifies preseq lc_extrap parameter: `-e`", + "description": "Specify the maximum extrapolation (lc_extrap mode only)" }, "mapstats_preseq_bootstrap": { "type": "integer", "default": 100, "fa_icon": "fab fa-bootstrap", - "help_text": "Specify the number of bootstraps preseq's lc_extrap mode will perform to calculate confidence intervals.\n\n> Modifies preseq lc_extrap parameter: `-n`", - "description": "Specify number of bootstraps to perform in preseq's lc_extrap mode." + "help_text": "Specify the number of bootstraps lc_extrap mode will perform to calculate confidence intervals.\n\n> Modifies preseq lc_extrap parameter: `-n`", + "description": "Specify number of bootstraps to perform (lc_extrap mode only)" }, "mapstats_preseq_cval": { "type": "number", "default": 0.95, "fa_icon": "fas fa-check-circle", - "help_text": "Specify the allowed level of confidence intervals used for prerseq's lc_extrap mode.\n\n> Modifies preseq lc_extrap parameter: `-c`", - "description": "Specify confidence interval level for preseq's lc_extrap mode." + "help_text": "Specify the allowed level of confidence intervals used for lc_extrap mode.\n\n> Modifies preseq lc_extrap parameter: `-c`", + "description": "Specify confidence interval level (lc_extrap mode only)" }, "mapstats_preseq_defects_mode": { "type": "boolean", - "description": "Specify to turn on preseq defects mode to extrapolate without testing for defects in lc_extrap mode.", - "help_text": "Specify to activate defects mode of `preseq lc_extrap`, which runs the extrapolation without testing for defects.\n\n> Modifies preseq lc_extrap parameter: `-D`", - "fa_icon": "fas fa-power-off" + "description": "Turns on defects mode to extrapolate without testing for defects (lc_extrap mode only).", + "help_text": "Activates defects mode of `lc_extrap`, which does the extrapolation without testing for defects.\n\n> Modifies preseq lc_extrap parameter: `-D`", + "fa_icon": "fab fa-creative-commons-sampling-plus" }, "skip_qualimap": { - "type": "boolean", - "description": "Specify to turn off coverage calculation with Qualimap.", - "fa_icon": "fas fa-forward" + "type": "boolean" }, "snpcapture_bed": { "type": "string", - "description": "Specify path to SNP capture positions in BED format for coverage calculations with Qualimap." + "description": "Path to snp capture in BED format. Provided file can also be gzipped." + }, + "elongation_factor": { + "type": "integer", + "default": 500, + "description": "Specify the number of bases to extend reference by (circularmapper only)", + "help_text": "The number of bases to extend the reference genome with. By default this is set to 500 if not specified otherwise.", + "fa_icon": "fas fa-external-link-alt" } }, "fa_icon": "fas fa-search" @@ -1181,92 +1179,91 @@ "description": "Options for calculating and filtering for characteristic ancient DNA damage patterns.", "default": "", "fa_icon": "fas fa-chart-line", - "help_text": "More documentation can be found at the follow links for:\n\n[DamageProfiler](https://github.com/Integrative-Transcriptomics/DamageProfiler)\n\nIf using TSV input, DamageProfiler is performed per library, i.e. after lane merging. BAM Trimming is only performed on non-UDG and half-UDG treated data.", + "help_text": "More documentation can be seen in the follow links for:\n\n[DamageProfiler](https://github.com/Integrative-Transcriptomics/DamageProfiler)\n\nIf using TSV input, DamageProfiler is performed per library, i.e. after lane merging. BAM Trimming is only performed on non-UDG and half-UDG treated data.", "properties": { "skip_damagecalculation": { "type": "boolean", "fa_icon": "fas fa-forward", - "help_text": "Specify to turn off computation of DNA damage profiles.", - "description": "Specify to turn off ancient DNA damage calculation." + "help_text": "Turns off damage calculation to compute DNA damage profiles." }, "damagecalculation_tool": { "type": "string", "default": "damageprofiler", "enum": ["damageprofiler", "mapdamage"], - "fa_icon": "fas fa-hammer", + "fa_icon": "fas fa-tools", "description": "Specify the tool to use for damage calculation.", "help_text": "Specify the tool to be used for damage calculation. DamageProfiler is generally faster than mapDamage2, but the latter has an option to limit the number of reads used. This can significantly speed up the processing of very large files, where the damage estimates are already accurate after processing only a fraction of the input." }, "damagecalculation_yaxis": { "type": "number", "default": 0.3, - "description": "Specify the maximum misincorporation frequency that should be displayed on damage plot.", - "help_text": "Specify the maximum misincorporation frequency that should be displayed in the damage plot.\n\n> Modifies DamageProfiler parameter: `-yaxis_dp_max` or mapDamage2 parameter: `--ymax`", + "description": "Specify the maximum misincorporation frequency that should be displayed on damage plot. Set to 0 to 'autoscale'.", + "help_text": "Specifies what the maximum misincorporation frequency should be displayed as, in the damage plot.\n\n> Modifies DamageProfiler parameter: -yaxis_dp_max or mapDamage2 parameter: --ymax", "fa_icon": "fas fa-ruler-combined" }, "damagecalculation_xaxis": { "type": "integer", "default": 25, "description": "Specify number of bases of each read to be considered for plotting damage estimation.", - "help_text": "Specify the number of bases to be considered for plotting nucleotide misincorporations.\n\n> Modifies DamageProfiler parameter: `-t` or mapDamage2 parameter: `-m`", + "help_text": "Specifies the number of bases to be considered for plotting nucleotide misincorporations.\n\n> Modifies DamageProfiler parameter: -t or mapDamage2 parameter: -m\n", "fa_icon": "far fa-chart-bar" }, "damagecalculation_damageprofiler_length": { "type": "integer", "default": 100, - "description": "Specify the length filter for DamageProfiler.", - "help_text": "Specify the number of bases which are considered for frequency computations.\n\n> Modifies DamageProfiler parameter: `-l`", - "fa_icon": "fas fa-ruler-horizontal" + "description": "Specifies the length filter for DamageProfiler.", + "help_text": "Number of bases which are considered for frequency computations, by default set to 100.`\n\n> Modifies DamageProfiler parameter: -l", + "fa_icon": "fas fa-sort-amount-down" }, "damagecalculation_mapdamage_downsample": { "type": "integer", "default": 0, "fa_icon": "fas fa-compress-alt", - "description": "Specify the maximum number of reads to consider for damage calculation with mapDamage.", - "help_text": "Specify the maximum number of reads used for damage calculation in mapDamage2. This can be used to significantly reduce the amount of time required for damage assessment. Note that a too low value can also obtain incorrect results.\n\n> Modifies mapDamage2 parameter: `-n`" + "description": "Specify the maximum number of reads to consider for damage calculation. Defaults value is 0 (i.e. no downsampling is performed).", + "help_text": "The maximum number of reads used for damage calculation in mapDamage2. Can be used to significantly reduce the amount of time required for damage assessment. Note that a too low value can also obtain incorrect results.\n\n>Modifies mapDamage2 parameter: -n\n" } } }, "feature_annotation_statistics": { "title": "Feature Annotation Statistics", "type": "object", - "description": "Options for calculating reference annotation statistics (e.g. gene coverages)", + "description": "Options for getting reference annotation statistics (e.g. gene coverages)", "default": "", "properties": { "run_bedtools_coverage": { "type": "boolean", - "description": "Specify to turn on calculation of number of reads, depth and breadth coverage of features in reference with bedtools.", - "fa_icon": "fas fa-power-off", - "help_text": "Specify to turn on the bedtools module, producing statistics for breadth (or percent coverage), and depth (or X fold) coverages.\n\n> Modifies bedtools coverage parameter: `-mean`" + "description": "Turn on ability to calculate no. reads, depth and breadth coverage of features in reference.", + "fa_icon": "fas fa-chart-area", + "help_text": "Specifies to turn on the bedtools module, producing statistics for breadth (or percent coverage), and depth (or X fold) coverages.\n\n> Modifies tool parameter(s):\n- bedtools coverage: `-mean`" }, "mapstats_bedtools_featurefile": { "type": "string", - "description": "Specify path to GFF or BED file containing positions of features in reference file for bedtools.", + "description": "Path to GFF or BED file containing positions of features in reference file (--fasta). Path should be enclosed in quotes.", "fa_icon": "fas fa-file-signature", "help_text": "Specify the path to a GFF/BED containing the feature coordinates (or any acceptable input for [`bedtools coverage`](https://bedtools.readthedocs.io/en/latest/content/tools/coverage.html)). Must be in quotes.\n" } }, "fa_icon": "fas fa-scroll", - "help_text": "If you're interested in looking at coverage stats for certain features on your reference such as genes, SNPs etc., you can use the following bedtools module for this purpose.\n\nMore documentation on bedtools can be seen in the [bedtools\ndocumentation](https://bedtools.readthedocs.io/en/latest/)\n\nIf using TSV input, bedtools is run after library merging of same-named library BAMs that have the same type of UDG treatment." + "help_text": "If you're interested in looking at coverage stats for certain features on your\nreference such as genes, SNPs etc., you can use the following bedtools module\nfor this purpose.\n\nMore documentation on bedtools can be seen in the [bedtools\ndocumentation](https://bedtools.readthedocs.io/en/latest/)\n\nIf using TSV input, bedtools is run after library merging of same-named library\nBAMs that have the same type of UDG treatment.\n" }, "host_removal": { "title": "Host Removal", "type": "object", - "description": "Options for removing host-mapped reads", + "description": "", "default": "", "properties": { "run_host_removal": { "type": "boolean", - "description": "Specify to turn on creation of pre-adapter-removal and/or read-pair-merging FASTQ files without reads that mapped to reference (e.g. for public upload of privacy sensitive non-host data).", - "help_text": "Specify to recreate pre-adapter-removal and/or read-pair-merging FASTQ files but without reads that mapped to reference (e.g. for public upload of privacy-sensitive non-host data)", + "description": "Turn on per-lane creation of pre-adapter-removal and/or read-pair-merging FASTQ files without reads that mapped to reference (e.g. for public upload of privacy sensitive non-host data)", + "help_text": "Recreates pre-adapter-removal and/or read-pair-merging FASTQ files but without reads that mapped to reference (e.g. for public upload of privacy-sensitive non-host data)", "fa_icon": "fas fa-power-off" }, "host_removal_mode": { "type": "string", "default": "remove", - "description": "Specify the host-mapped read removal mode.", - "help_text": "Specify the host-mapped read removal mode.\n\n> Modifies extract_map_reads.py parameter: -m", - "fa_icon": "fas fa-toggle-on", + "description": "Host-mapped read removal mode. Remove mapped reads completely from FASTQ (remove) or just mask the host sequence of mapped reads with N (replace).", + "help_text": "Modifies extract_map_reads.py parameter: -m", + "fa_icon": "fas fa-plane-slash", "enum": ["remove", "replace"] } }, @@ -1275,56 +1272,56 @@ "contamination_estimation": { "title": "Contamination estimation", "type": "object", - "description": "Options for the estimation of contamination in human data", + "description": "Options for the estimation of contamination", "default": "", "fa_icon": "fas fa-radiation-alt", "properties": { "run_contamination_estimation_angsd": { "type": "boolean", - "description": "Specify to turn on nuclear contamination estimation for genomes with ANGSD.", - "help_text": "Specify to run nuclear DNA contamination estimation with ANGSD.", + "description": "Turn on nuclear contamination estimation for genomes with ANGSD.", + "help_text": "Specify to run the optional processes for nuclear DNA contamination estimation with ANGSD.", "fa_icon": "fas fa-power-off" }, "contamination_estimation_angsd_chrom_name": { "type": "string", "default": "X", - "description": "Specify the name of the chromosome to be used for contamination estimation with ANGSD.", - "help_text": "Specify the name of the chromosome to be used for contamination estimation with ANGSD as specified in your FASTA/BAM header, e.g. 'X' for hs37d5 or 'chrX' for hg19", + "description": "The name of the chromosome to be used for contamination estimation.", + "help_text": "The name of the chromosome as specified in your FASTA/bam header.\ne.g. 'X' for hs37d5, 'chrX' for HG19", "fa_icon": "fas fa-address-card" }, "contamination_estimation_angsd_range_from": { "type": "integer", "default": 5000000, - "description": "Specify the first position on the chromosome to be used for contamination estimation with ANGSD.", - "help_text": "Specify the beginning of the genetic range that should be utilised for nuclear contamination estimation with ANGSD.", + "description": "The first position on the chromosome to be used for contamination estimation with ANGSD.", + "help_text": "The beginning of the genetic range that should be utilised for nuclear contamination estimation.", "fa_icon": "fas fa-map-marker-alt" }, "contamination_estimation_angsd_range_to": { "type": "integer", "default": 154900000, - "help_text": "Specify the end of the genetic range that should be utilised for nuclear contamination estimation with ANGSD.", - "description": "Specify the last position on the chromosome to be used for contamination estimation with ANGSD.", + "help_text": "The end of the genetic range that should be utilised for nuclear contamination estimation.", + "description": "The last position on the chromosome to be used for contamination estimation with ANGSD.", "fa_icon": "fas fa-map-marker-alt" }, "contamination_estimation_angsd_mapq": { "type": "integer", "default": 30, - "help_text": "Specify the minimum mapping quality reads should have for contamination estimation with ANGSD.\n\n> Modifies ANGSD parameter: `-minMapQ`", + "help_text": "> Modifies angsd parameter: `-minMapQ`", "description": "Specify the minimum mapping quality reads should have for contamination estimation with ANGSD.", - "fa_icon": "fas fa-filter" + "fa_icon": "fas fa-thermometer-full" }, "contamination_estimation_angsd_minq": { "type": "integer", "default": 30, "description": "Specify the minimum base quality reads should have for contamination estimation with ANGSD.", - "help_text": "Specify the minimum base quality reads should have for contamination estimation with ANGSD.\n\n> Modifies ANGSD parameter: `-minQ`", - "fa_icon": "fas fa-filter" + "help_text": "> Modifies angsd parameter: `-minQ`", + "fa_icon": "fas fa-ruler-vertical" }, "contamination_estimation_angsd_hapmap": { "type": "string", - "default": "${projectDir}/assets/angsd_resources/HapMapChrX.gz", - "description": "Specify path to HapMap file of chromosome for contamination estimation with ANGSD.", - "help_text": "Specify a path to HapMap file of chromosome for contamination estimation with ANGSD. The haplotype map, or \"HapMap\", records the location of haplotype blocks and their tag SNPs.", + "default": "/Users/judith_ballesteros/Documents/GitHub/eager/assets/angsd_resources/HapMapChrX.gz", + "description": "Path to HapMap file of chromosome for contamination estimation..", + "help_text": "The haplotype map, or \"HapMap\", records the location of haplotype blocks and their tag SNPs.", "fa_icon": "fas fa-map" } } @@ -1332,20 +1329,20 @@ "human_sex_determination": { "title": "Human Sex Determination", "type": "object", - "description": "Options for the calculation of genetic sex of human individuals.", + "description": "Options for the calculation of biological sex of human individuals.", "default": "", "properties": { "run_sexdeterrmine": { "type": "boolean", - "fa_icon": "fas fa-power-off", - "description": "Specify to turn on sex determination for genomes mapped to human reference genomes with Sex.DetERRmine.", - "help_text": "Specify to run genetic sex determination." + "fa_icon": "fas fa-transgender-alt", + "description": "Turn on sex determination for human reference genomes. This will run on single- and double-stranded variants of a library separately.", + "help_text": "Specify to run the optional process of sex determination." }, "sexdeterrmine_bedfile": { "type": "string", "fa_icon": "fas fa-bed", - "description": "Specify path to SNP panel in BED format for error bar calculation.", - "help_text": "Specify a BED file with SNPs to be used for X-/Y-rate calculation. Running without this parameter will considerably increase runtime, and render the resulting error bars untrustworthy. Theoretically, any set of SNPs that are distant enough that two SNPs are unlikely to be covered by the same read can be used here. The programme was coded with the 1240k panel in mind." + "description": "Specify path to SNP panel in bed format for error bar calculation. Optional (see documentation).", + "help_text": "Specify an optional bedfile of the list of SNPs to be used for X-/Y-rate calculation. Running without this parameter will considerably increase runtime, and render the resulting error bars untrustworthy. Theoretically, any set of SNPs that are distant enough that two SNPs are unlikely to be covered by the same read can be used here. The programme was coded with the 1240K panel in mind." } }, "fa_icon": "fas fa-transgender-alt", From ed9e94845e36dbfbc96dfa55bed05ba6fcb9b11c Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 5 Jul 2024 10:29:37 +0200 Subject: [PATCH 06/61] Revert "fixing schema" This reverts commit 3fac93d0b28b95081ef32d26ca1f6de9aa93c542. --- nextflow_schema.json | 563 ++++++++++++++++++++++--------------------- 1 file changed, 283 insertions(+), 280 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index b59b73117..06737bf47 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -19,14 +19,14 @@ "mimetype": "text/csv", "pattern": "^\\S+\\.(c|t)sv$", "schema": "assets/schema_input.json", - "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/eager/usage#samplesheet-input).", + "description": "Path to tab- or comma-separated file containing information about the samples in the experiment.", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a tab- or comma-separated file with 11 columns, and a header row. See [usage docs](https://nf-co.re/eager/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" }, "convert_inputbam": { "type": "boolean", "description": "Specify to convert input BAM files back to FASTQ for remapping", - "help_text": "This parameter tells the pipeline to convert the BAM files listed in the `--input` TSV or CSV sheet back to FASTQ format to allow re-preprocessing and mapping\n\nCan be useful when you want to ensure consistent mapping parameters across all libraries when incorporating public data, however be careful of biases that may come from re-processing again (the BAM files may already be clipped, or only mapped reads with different settings are included so you may not have all reads from the original publication).", + "help_text": "This parameter tells the pipeline to convert the BAM files listed in the `--input` TSV or CSV sheet back to FASTQ format to allow re-preprocessing and mapping.\n\nCan be useful when you want to ensure consistent mapping parameters across all libraries when incorporating public data, however be careful of biases that may come from re-processing again (the BAM files may already be clipped, or only mapped reads with different settings are included so you may not have all reads from the original publication).", "fa_icon": "fas fa-undo-alt" }, "outdir": { @@ -62,31 +62,32 @@ "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "errorMessage": "The path to the reference FASTA file must not contain spaces and must have file extensions '.fasta', '.fa', '.fas', '.fna', '.fasta.gz','.fa.gz','.fas.gz' or '.fna.gz'.", - "description": "Path to FASTA genome file.", + "description": "Path to FASTA file of the reference genome.", "help_text": "This parameter is *mandatory* if `--genome` or `--fasta_sheet` are not specified. If you don't supply a mapper index (e.g. for BWA), this will be generated for you automatically. Combine with `--save_reference` to save mapper index for future runs.", "fa_icon": "far fa-file-code" }, "fasta_fai": { "type": "string", - "description": "Path to samtools FASTA index (typically ending in '.fai'). If not supplied will be made for you.", + "description": "Specify path to samtools FASTA index.", "help_text": "If you want to use a pre-existing `samtools faidx` index, use this to specify the required FASTA index file for the selected reference genome. This should be generated by samtools faidx and has a file suffix of `.fai`.", "fa_icon": "fas fa-address-book" }, "fasta_dict": { "type": "string", - "description": "Path to picard sequence dictionary file (typically ending in '.dict'). If not supplied will be made for you.", + "description": "Specify path to Picard sequence dictionary file.", "help_text": "If you want to use a pre-existing `picard CreateSequenceDictionary` dictionary file, use this to specify the required `.dict` file for the selected reference genome.", "fa_icon": "fas fa-address-book" }, "fasta_mapperindexdir": { "type": "string", - "description": "Path to directory containing index files of the FASTA for a given mapper.", - "help_text": "For most people this will likely be the same directory that contains the file you provided to `--fasta`.\n\nIf you want to use pre-existing `bwa index` indices, the directory should contain files ending in '.amb' '.ann' '.bwt'. If you want to use pre-existing `bowtie2 build` indices, the directory should contain files ending in'.1.bt2', '.2.bt2', '.rev.1.bt2'. \n\nIn any case do not include the files themselves in the path. nf-core/eager will automagically detect the index files by searching for the FASTA filename with the corresponding `bwa index`/`bowtie2 build` file suffixes. If not supplied, the indices will be generated for you.\n\n" + "description": "Specify path to directory containing index files of the FASTA for a given mapper.", + "help_text": "For most people this will likely be the same directory that contains the file you provided to `--fasta`.\n\nIf you want to use pre-existing `bwa index` indices, the directory should contain files ending in '.amb' '.ann' '.bwt'. If you want to use pre-existing `bowtie2 build` indices, the directory should contain files ending in'.1.bt2', '.2.bt2', '.rev.1.bt2'.\n\nIn any case do not include the files themselves in the path. nf-core/eager will automagically detect the index files by searching for the FASTA filename with the corresponding `bwa index`/`bowtie2 build` file suffixes. If not supplied, the indices will be generated for you.\n\n", + "fa_icon": "fas fa-folder-open" }, "save_reference": { "type": "boolean", - "description": "Specify to save any pipeline-generated reference genome indices in the results directory.", - "help_text": "Use this if you do not have pre-made reference FASTA indices for `bwa`, `samtools` and `picard`. If you turn this on, the indices nf-core/eager generates for you and will be saved in the `/results/reference_genomes` for you. If not supplied, nf-core/eager generated index references will be deleted.\n\n> modifies SAMtools index command: `-c`", + "description": "Specify to save any pipeline-generated reference genome indices in the results directory.", + "help_text": "Use this if you do not have pre-made reference FASTA indices for `bwa`, `samtools` and `picard`. If you turn this on, the indices nf-core/eager generates for you and will be saved in the `/results/reference_genomes` for you. If not supplied, nf-core/eager generated index references will be deleted.\n\n> Modifies SAMtools index command: `-c`", "fa_icon": "fas fa-save" }, "fasta_sheet": { @@ -125,8 +126,8 @@ }, "fasta_circular_target": { "type": "string", - "description": "Specify the FASTA header of the target chromosome to extend. Only applies when using `circularmapper`.", - "help_text": "The entry (chromosome, contig, etc.) in your FASTA reference that you'd like to be treated as circular.\n\nApplies only when providing a single FASTA file via `--fasta` (NOT multi-reference input - see reference TSV/CSV input).\n\n> Modifies tool parameter(s):\n> - circulargenerator `-s`\n", + "description": "Specify the FASTA header of the target chromosome to extend when using `circularmapper`.", + "help_text": "The entry (chromosome, contig, etc.) in your FASTA reference that you'd like to be treated as circular.\n\nApplies only when providing a single FASTA file via `--fasta` (NOT multi-reference input - see reference TSV/CSV input).\n\n> Modifies tool parameter(s):\n> - circulargenerator `-s`", "fa_icon": "fas fa-bullseye" } } @@ -335,7 +336,7 @@ "preprocessing": { "title": "Preprocessing", "type": "object", - "description": "Removal of adapters, paired-end merging, poly-G removal etc.", + "description": "Removal of adapters, paired-end merging, poly-G removal, etc.", "default": "", "properties": { "sequencing_qc_tool": { @@ -348,8 +349,8 @@ }, "skip_preprocessing": { "type": "boolean", - "description": "Specify to skip all preprocessing steps (adapter removal, paired-end merging, poly-g trimming etc).", - "help_text": "Specify to skip all preprocessing steps (adapter removal, paired-end merging, poly-g trimming etc).\n\nThis will also mean you will only get one set of FastQC results (of the input reads).", + "description": "Specify to skip all preprocessing steps (adapter removal, paired-end merging, poly-G trimming, etc).", + "help_text": "Specify to skip all preprocessing steps (adapter removal, paired-end merging, poly-G trimming etc).\n\nThis will also mean you will only get one set of FastQC results (of the input reads).", "fa_icon": "fas fa-forward" }, "preprocessing_tool": { @@ -364,18 +365,18 @@ "type": "boolean", "description": "Specify to skip read-pair merging.", "fa_icon": "fas fa-forward", - "help_text": "Turns off the paired-end read merging, and will result in paired-end mapping modes being used during reference of reads again alignment.\n\nThis can be useful in cases where you have long ancient DNA reads, modern DNA, or when you want to utilise mate-pair 'spatial' information..\n\n\u26a0\ufe0f If you run this and also with --preprocessing_minlength set to a value (as is by default!), you may end up removing single reads from either the pair1 or pair2 file. These reads will be NOT be mapped when aligning with either bwa or bowtie, as both can only accept one (forward) or two (forward and reverse) FASTQs as input in paired-end mode.\n\n> \u26a0\ufe0f If you run metagenomic screening as well as skipping merging, all reads will be screened as independent reads - not as pairs! - as all FASTQ files from BAM filtering are merged into one. This merged file is _not_ saved in results directory.\n\n> Modifies AdapterRemoval parameter: `--collapse`\n> Modifies fastp parameter: `--merge`" + "help_text": "Turns off the paired-end read merging, and will result in paired-end mapping modes being used during reference of reads again alignment.\n\nThis can be useful in cases where you have long ancient DNA reads, modern DNA or when you want to utilise mate-pair 'spatial' information.\n\n ⚠️ If you run this with --preprocessing_minlength set to a value (as is by default!), you may end up removing single reads from either the pair1 or pair2 file. These reads will be NOT be mapped when aligning with either BWA or bowtie, as both can only accept one (forward) or two (forward and reverse) FASTQs as input in paired-end mode.\n\n> ⚠️ If you run metagenomic screening as well as skipping merging, all reads will be screened as independent reads - not as pairs! - as all FASTQ files from BAM filtering are merged into one. This merged file is _not_ saved in results directory.\n\n> Modifies AdapterRemoval parameter: `--collapse`\n> Modifies fastp parameter: `--merge`" }, "preprocessing_excludeunmerged": { "type": "boolean", - "description": "Specify to exclude pairs that did not overlap sufficiently for merging (i.e., keep merged reads only).", + "description": "Specify to exclude read-pairs that did not overlap sufficiently for merging (i.e., keep merged reads only).", "fa_icon": "fas fa-trash-alt", - "help_text": "Specify to exclude pairs that did not overlap sufficiently for merging (i.e., keep merged reads only), in otherwords singletons (i.e. reads missing a pair), or un-merged reads (where there wasn't sufficient overlap) are discarded.\n\nMost ancient DNA molecules are very short, and the majority are expected to merge. Specifying this parameter can sometimes be useful when dealing with ultra-short aDNA reads to reduce the number of longer-reads you may have in your library that are derived from modern contamination. It can also speed up run time of mapping steps.\n\nYou may want to use this if you want ensure only the best quality reads for your analysis, but with the penalty of potentially losing still valid data (even if some reads have slightly lower quality and/or are longer). It is highly recommended when using 'dedup' deduplication tool." + "help_text": "Specify to exclude read-pairs that did not overlap sufficiently for merging (i.e., keep merged reads only). Singletons (i.e. reads missing a pair) or un-merged reads (where there wasn't sufficient overlap) are discarded.\n\nMost ancient DNA molecules are very short, and the majority are expected to merge. Specifying this parameter can sometimes be useful when dealing with ultra-short aDNA reads to reduce the number of longer-reads you may have in your library that are derived from modern contamination. It can also speed up run time of mapping steps.\n\nYou may want to use this if you want ensure only the best quality reads for your analysis, but with the penalty of potentially losing still valid data (even if some reads have slightly lower quality and/or are longer). It is highly recommended when using 'dedup' deduplication tool." }, "preprocessing_skipadaptertrim": { "type": "boolean", "description": "Specify to skip removal of adapters.", - "help_text": "Specify to turn off trimming of adapters from reads.\n\nYou may wish to do this if you are using public data (e.g. ENA, SRA), that _should_ have all library artefacts from reads.\n\nThis will override any other adapter parameters provided (i.e, `--preprocessing_adapterlist` and or/ `--preprocessing_adapter{1,2}` will be ignored)!\n\n> Modifies AdapterRemoval parameter: `--adapter1` and `--adapter2` (sets both to an empty string)\n> Applies fastp parameter: `--disable_adapter_trimming`", + "help_text": "Specify to turn off trimming of adapters from reads.\n\nYou may wish to do this if you are using publicly available data, that _should_ have all library artefacts from reads removed.\n\nThis will override any other adapter parameters provided (i.e, `--preprocessing_adapterlist` and `--preprocessing_adapter{1,2}` will be ignored)!\n\n> Modifies AdapterRemoval parameter: `--adapter1` and `--adapter2` (sets both to an empty string)\n> Applies fastp parameter: `--disable_adapter_trimming`", "fa_icon": "fas fa-forward" }, "preprocessing_adapter1": { @@ -386,93 +387,93 @@ }, "preprocessing_adapter2": { "type": "string", - "description": "Specify the nucleotide sequence for the forward read/R2.", + "description": "Specify the nucleotide sequence for the reverse read/R2.", "fa_icon": "fas fa-grip-lines", "help_text": "Specify a nucleotide sequence for the forward read/R2.\n\nIf not modified by the user, the default for the particular preprocessing tool will be used. To turn off adapter trimming use `--preprocessing_skipadaptertrim`.\n\n> Modifies AdapterRemoval parameter: `--adapter2`\n> Modifies fastp parameter: `--adapter_sequence_r2`" }, "preprocessing_adapterlist": { "type": "string", - "description": "Specify a list of all possible adapters to trim. Overrides --preprocessing_adapter1/2. Formats: .txt (AdapterRemoval) or .fasta. (fastp).", - "help_text": "Allows to supply a file with a list of adapter (combinations) to remove from all files. \n\nOverrides the `--preprocessing_adapter1`/`--preprocessing_adapter2` parameters . \n\nNote that the two tools have slightly different behaviours.\n\nFor AdapterRemoval this consists of a two column table with a `.txt` extension: first column represents forward strand, second column for reverse strand. You must supply all possibly combinations, one per line, and this list is applied to all files. Only Adapters in this list will be screened for and removed. See AdapterRemoval documentation for more information.\n\nFor fastp this consists of a standard FASTA format with a `.fasta`/`.fa`/`.fna`/`.fas` extension. The adapter sequence in this file should be at least 6bp long, otherwise it will be skipped. fastp first will perform auto-detection of reads and will be removed , and then _additionally_ adapters present in the FASTA file one by one will be removed.\n\n> Modifies AdapterRemoval parameter: `--adapter-list`\n> Modifies fastp parameter: `--adapter_fasta`", + "description": "Specify a list of all possible adapters to trim.", + "help_text": "Specify a file with a list of adapter (combinations) to remove from all files.\n\nOverrides the `--preprocessing_adapter1`/`--preprocessing_adapter2` parameters.\n\nNote that the two tools have slightly different behaviours.\n\nFor AdapterRemoval this consists of a two column table with a `.txt` extension: first column represents forward strand, second column for reverse strand. You must supply all possible combinations, one per line, and this list is applied to all files. Only Adapters in this list will be screened for and removed. See AdapterRemoval documentation for more information.\n\nFor fastp this consists of a standard FASTA format with a `.fasta`/`.fa`/`.fna`/`.fas` extension. The adapter sequence in this file should be at least 6bp long, otherwise it will be skipped. fastp will first perform auto-detection and removal of adapters and then _additionally_ remove adapters present in the FASTA file one by one will.\n\n> Modifies AdapterRemoval parameter: `--adapter-list`\n> Modifies fastp parameter: `--adapter_fasta`", "fa_icon": "fas fa-list" }, "preprocessing_minlength": { "type": "integer", "default": 25, "description": "Specify the minimum length reads must have to be retained.", - "help_text": "Specify the minimum length reads must have to be retained. \n\nReads smaller than this length after trimming are discarded and not included in downstream analyses. Typically in ancient DNA users will set this to 30 or for very old samples around 25 bp - reads any shorter that this often are not specific enough to provide useful information.\n\n> Modifies AdapterRemoval parameter: `--minlength`\n> Modifies fastp parameter: `--length_required`", - "fa_icon": "fas fa-ruler" + "help_text": "Specify the minimum length reads must have to be retained.\n\nReads smaller than this length after trimming are discarded and not included in downstream analyses. Typically in ancient DNA, users will set this to 30 or for very old samples around 25 bp - reads any shorter that this often are not specific enough to provide useful information.\n\n> Modifies AdapterRemoval parameter: `--minlength`\n> Modifies fastp parameter: `--length_required`", + "fa_icon": "fas fa-ruler-horizontal" }, "preprocessing_trim5p": { "type": "integer", "default": 0, - "description": "Specify number of bases to hard-trim from 5 prime or front of reads. Exact behaviour varies per tool, see documentation.", - "help_text": "Specify number of bases to hard-trim from 5 prime or front of reads. Exact behaviour varies per tool, see documentation. By default set to `0` to not perform any hard trimming.\n\nThis parameter allows users to 'hard' remove a number of bases from the beginning or end of reads, regardless of quality.\n\n\u26a0\ufe0f when this trimming occurs depends on the tool, i.e., the exact behaviour is not the same between AdapterRemoval and fastp.\n\nFor fastp: this 5p/3p trimming occurs _prior_ to any other trimming (quality, poly-G, adapter). Please see the [fastp documentation](https://github.com/OpenGene/fastp#global-trimming) for more information. If you wish to use this to remove damage prior mapping (to allow more specific mapping), ensure you have manually removed adapters/quality trimmed **prior** to giving the reads to nf-core/eager. Alternatively, you can use Bowtie2's inbuilt pre-mapping read-end trimming functionality. Note that nf-core/eager only allows this hard trimming equally for both forward and reverse reads (i.e., you cannot provide different values for the 5p end for R1 and R2).\n\nFor AdapterRemoval, this trimming happens _after_ the removal of adapters, however prior to quality trimming. Therefore this is more suitable for hard-removal of damage prior mapping (however the Bowtie2 system will be more reliable).\n\n> Modifies AdapterRemoval parameters: `--trim5p`\n> Modifies fastp parameters: `--trim_front1` and/or `--trim_front2`\n", + "description": "Specify number of bases to hard-trim from 5 prime or front of reads.", + "help_text": "Specify number of bases to hard-trim from 5 prime or front of reads. Exact behaviour varies per tool, see documentation. By default set to `0` to not perform any hard trimming.\n\nThis parameter allows users to 'hard' remove a number of bases from the beginning or end of reads, regardless of quality.\n\n ⚠️ When this trimming occurs depends on the tool, i.e., the exact behaviour is not the same between AdapterRemoval and fastp.\n\nFor fastp: 5p/3p trimming occurs _prior_ to any other trimming (quality, poly-G, adapter). Please see the [fastp documentation](https://github.com/OpenGene/fastp#global-trimming) for more information. If you wish to use this to remove damage prior to mapping (to allow more specific mapping), ensure you have manually removed adapters/quality trimmed **prior** to giving the reads to nf-core/eager. Alternatively, you can use Bowtie 2's inbuilt pre-mapping read-end trimming functionality. Note that nf-core/eager only allows this hard trimming equally for both forward and reverse reads (i.e., you cannot provide different values for the 5p end for R1 and R2).\n\nFor AdapterRemoval, this trimming happens _after_ the removal of adapters, however prior to quality trimming. Therefore, this is more suitable for hard-removal of damage before mapping (however the Bowtie 2 system will be more reliable).\n\n> Modifies AdapterRemoval parameters: `--trim5p`\n> Modifies fastp parameters: `--trim_front1` and/or `--trim_front2`", "fa_icon": "fas fa-cut" }, "preprocessing_trim3p": { "type": "integer", "default": 0, - "description": "Specify number of bases to hard-trim from 3 prime or tail of reads. Exact behaviour varies per tool, see documentation.", + "description": "Specify number of bases to hard-trim from 3 prime or tail of reads.", "fa_icon": "fas fa-cut", - "help_text": "Specify number of bases to hard-trim from 3 prime or tail of reads. Exact behaviour varies per tool, see documentation. By default set to `0` to not perform any hard trimming.\n\nThis parameter allows users to 'hard' remove a number of bases from the beginning or end of reads, regardless of quality.\n\n\u26a0\ufe0f when this trimming occurs depends on the tool, i.e., the exact behaviour is not the same between AdapterRemoval and fastp.\n\nFor fastp: this 5p/3p trimming occurs _prior_ to any other trimming (quality, poly-G, adapter). Please see the [fastp documentation](https://github.com/OpenGene/fastp#global-trimming) for more information. If you wish to use this to remove damage prior mapping (to allow more specific mapping), ensure you have manually removed adapters/quality trimmed **prior** to giving the reads to nf-core/eager. Alternatively, you can use Bowtie2's inbuilt pre-mapping read-end trimming functionality. Note that nf-core/eager only allows this hard trimming equally for both forward and reverse reads (i.e., you cannot provide different values for the 3p end for R1 and R2).\n\nFor AdapterRemoval, this trimming happens _after_ the removal of adapters, however prior to quality trimming. Therefore this is more suitable for hard-removal of damage prior mapping (however the Bowtie2 system will be more reliable).\n\n> Modifies AdapterRemoval parameters: `--trim3p`\n> Modifies fastp parameters: `--trim_tail1` and/or `--trim_tail2`\n" + "help_text": "Specify number of bases to hard-trim from 3 prime or tail of reads. Exact behaviour varies per tool, see documentation. By default set to `0` to not perform any hard trimming.\n\nThis parameter allows users to 'hard' remove a number of bases from the beginning or end of reads, regardless of quality.\n\n⚠️ When this trimming occurs depends on the tool, i.e., the exact behaviour is not the same between AdapterRemoval and fastp.\n\nFor fastp: 5p/3p trimming occurs _prior_ to any other trimming (quality, poly-G, adapter). Please see the [fastp documentation](https://github.com/OpenGene/fastp#global-trimming) for more information. If you wish to use this to remove damage prior to mapping (to allow more specific mapping), ensure you have manually removed adapters/quality trimmed **prior** to giving the reads to nf-core/eager. Alternatively, you can use Bowtie 2's inbuilt pre-mapping read-end trimming functionality. Note that nf-core/eager only allows this hard trimming equally for both forward and reverse reads (i.e., you cannot provide different values for the 3p end for R1 and R2).\n\nFor AdapterRemoval, this trimming happens _after_ the removal of adapters, however prior to quality trimming. Therefore this is more suitable for hard-removal of damage before mapping (however the Bowtie 2 system will be more reliable).\n\n> Modifies AdapterRemoval parameters: `--trim3p`\n> Modifies fastp parameters: `--trim_tail1` and/or `--trim_tail2`" }, "preprocessing_savepreprocessedreads": { "type": "boolean", "description": "Specify to save the preprocessed reads in the results directory.", "fa_icon": "fas fa-save", - "help_text": "Specify to save the preprocessed reads in FASTQ format the results directory. \n\nThis can be useful for re-analysing in FASTQ files manually, or uploading to public data repositories such as ENA/SRA (provided you don't do length filtering nor merging)." + "help_text": "Specify to save the preprocessed reads in FASTQ format the results directory.\n\nThis can be useful for re-analysing FASTQ files manually, or uploading to public data repositories such as ENA/SRA (provided you don't filter by length or merge paired reads)." }, "preprocessing_fastp_complexityfilter": { "type": "boolean", - "description": "Specify to turn on sequence complexity filtering of reads with fastp.", + "description": "Specify to turn on sequence complexity filtering of reads.", "help_text": "Performs a poly-G tail removal step in the beginning of the pipeline using fastp.\n\nThis can be useful for trimming ploy-G tails from short-fragments sequenced on two-colour Illumina chemistry such as NextSeqs or NovaSeqs (where no-fluorescence is read as a G on two-colour chemistry), which can inflate reported GC content values.\n\n> Modifies fastp parameter: `--trim_poly_g`", - "fa_icon": "fas fa-cut" + "fa_icon": "fas fa-power-off" }, "preprocessing_fastp_complexityfilter_threshold": { "type": "integer", "default": 10, "description": "Specify the complexity threshold that must be reached or exceeded to retain reads.", "help_text": "This option can be used to define the minimum length of a poly-G tail to begin low complexity trimming.\n\n> Modifies fastp parameter: `--poly_g_min_len`", - "fa_icon": "fas fa-ruler" + "fa_icon": "fas fa-filter" }, "preprocessing_adapterremoval_preserve5p": { "type": "boolean", - "description": "Skip AdapterRemoval base trimming (n, quality) of 5 prime end.", + "description": "Skip AdapterRemoval quality and N base trimming at 5 prime end.", "help_text": "Turns off quality based trimming at the 5p end of reads when any of the AdapterRemoval quality or N trimming options are used. Only 3p end of reads will be removed.\n\nThis also entirely disables quality based trimming of collapsed reads, since both ends of these are informative for PCR duplicate filtering. For more information see the AdapterRemoval [documentation](https://adapterremoval.readthedocs.io/en/stable/manpage.html#cmdoption-adapterremoval-preserve5p).\n\n> Modifies AdapterRemoval parameters: `--preserve5p`", "fa_icon": "fas fa-shield-alt" }, "preprocessing_adapterremoval_skipqualitytrimming": { "type": "boolean", - "description": "Skip AdapterRemoval quality and N trimming from ends of reads.", - "help_text": "Turns off AdapterRemoval quality trimming from ends of reads. \n\nThis can be useful to reduce runtime when running public data that has already been processed.\n\n> Modifies AdapterRemoval parameters: `--trimqualities` ", + "description": "Specify to skip AdapterRemoval quality and N trimming at the ends of reads.", + "help_text": "Turns off AdapterRemoval quality trimming from ends of reads.\n\nThis can be useful to reduce runtime when running public data that has already been processed.\n\n> Modifies AdapterRemoval parameters: `--trimqualities` ", "fa_icon": "fas fa-forward" }, "preprocessing_adapterremoval_trimbasequalitymin": { "type": "integer", "default": 20, "description": "Specify AdapterRemoval minimum base quality for trimming off bases.", - "help_text": "Defines the minimum read quality per base that is required for a base to be kept. Individual bases at the ends of reads falling below this threshold will be clipped off.\n\n> Modifies AdapterRemoval parameter: `--minquality`", - "fa_icon": "fas fa-ruler-vertical" + "help_text": "Defines the minimum read quality per base that is required for a base to be kept by AdapterRemoval. Individual bases at the ends of reads falling below this threshold will be clipped off.\n\n> Modifies AdapterRemoval parameter: `--minquality`", + "fa_icon": "fas fa-filter" }, "preprocessing_adapterremoval_skipntrimming": { "type": "boolean", - "description": "Skip AdapterRemoval N trimming (quality trimming only).", - "help_text": "Turns off AdapterRemoval N trimming from ends of reads. \n\nThis can be useful to reduce runtime when running public data that has already been processed.\n\n> Modifies AdapterRemoval parameters: `--trimns` ", + "description": "Specify to skip AdapterRemoval N trimming (quality trimming only).", + "help_text": "Turns off AdapterRemoval N trimming from ends of reads.\n\nThis can be useful to reduce runtime when running publicly available data that has already been processed.\n\n> Modifies AdapterRemoval parameters: `--trimns` ", "fa_icon": "fas fa-forward" }, "preprocessing_adapterremoval_adapteroverlap": { "type": "integer", "default": 1, "description": "Specify the AdapterRemoval minimum adapter overlap required for trimming.", - "fa_icon": "fas fa-ruler-horizontal", + "fa_icon": "fas fa-filter", "help_text": "Specifies a minimum number of bases that overlap with the adapter sequence before AdapterRemoval trims adapters sequences from reads.\n\n> Modifies AdapterRemoval parameter: `--minadapteroverlap`" }, "preprocessing_adapterremoval_qualitymax": { "type": "integer", "default": 41, - "description": "Specify the AdapterRemoval maximum Phred score used in input FASTQ files", - "help_text": "Specify maximum Phred score of the quality field of FASTQ files. \n\nThe quality-score range can vary depending on the machine and version (e.g. see diagram [here](https://en.wikipedia.org/wiki/FASTQ_format#Encoding), and this allows you to increase from the default AdapterRemoval value of 41. \n\nNote that while this theoretically can provide you with more confident and precise base call information, many downstream tools only accept FASTQ files with Phred scores limited to a max of 41, and therefore increasing the default for this parameter may make the resulting preprocessed files incompatible with some downstream tools.\n\n> Modifies AdapterRemoval parameters: `--qualitymax`", + "description": "Specify the AdapterRemoval maximum Phred score used in input FASTQ files.", + "help_text": "Specify maximum Phred score of the quality field of FASTQ files.\n\nThe quality-score range can vary depending on the machine and version (e.g. see diagram [here](https://en.wikipedia.org/wiki/FASTQ_format#Encoding), and this allows you to increase from the default AdapterRemoval value of 41.\n\nNote that while this can theoretically provide you with more confident and precise base call information, many downstream tools only accept FASTQ files with Phred scores limited to a max of 41, and therefore increasing the default for this parameter may make the resulting preprocessed files incompatible with some downstream tools.\n\n> Modifies AdapterRemoval parameters: `--qualitymax`", "fa_icon": "fas fa-tachometer-alt" } }, @@ -486,9 +487,9 @@ "properties": { "run_fastq_sharding": { "type": "boolean", - "description": "Turn on FastQ sharding.", + "description": "Specify to turn on FASTQ sharding.", "fa_icon": "fas fa-power-off", - "help_text": "Sharding will split the FastQs into smaller chunks before mapping. These chunks are then mapped in parallel. This approach can speed up the mapping process for larger FastQ files." + "help_text": "Sharding will split the FASTQs into smaller chunks before mapping. These chunks are then mapped in parallel. This approach can speed up the mapping process for larger FASTQ files." }, "fastq_shard_size": { "type": "integer", @@ -502,99 +503,99 @@ "default": "bwaaln", "enum": ["bwaaln", "bwamem", "bowtie2", "circularmapper"], "description": "Specify which mapper to use.", - "help_text": "Specify which mapping tool to use. Options are BWA aln ('`bwaaln`'), BWA mem ('`bwamem`'), circularmapper ('`circularmapper`'), or bowtie2 ('`bowtie2`'). BWA aln is the default and highly suited for short-read ancient DNA. BWA mem can be quite useful for modern DNA, but is rarely used in projects for ancient DNA. CircularMapper enhances the mapping procedure to circular references, using the BWA algorithm but utilizing a extend-remap procedure (see Peltzer et al 2016, Genome Biology for details). Bowtie2 is similar to BWA aln, and has recently been suggested to provide slightly better results under certain conditions ([Poullet and Orlando 2020](https://doi.org/10.3389/fevo.2020.00105)), as well as providing extra functionality (such as FASTQ trimming).\n\nMore documentation can be seen for each tool under:\n\n- [BWA aln](http://bio-bwa.sourceforge.net/bwa.shtml#3)\n- [BWA mem](http://bio-bwa.sourceforge.net/bwa.shtml#3)\n- [CircularMapper](https://circularmapper.readthedocs.io/en/latest/contents/userguide.html)\n- [Bowtie2](http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml#command-line)", - "fa_icon": "fas fa-layer-group" + "help_text": "Specify which mapping tool to use. Options are BWA aln ('`bwaaln`'), BWA mem ('`bwamem`'), circularmapper ('`circularmapper`'), or Bowtie 2 ('`bowtie2`'). BWA aln is the default and highly suited for short-read ancient DNA. BWA mem can be quite useful for modern DNA, but is rarely used in projects for ancient DNA. CircularMapper enhances the mapping procedure to circular references, using the BWA algorithm but utilizing an extend-remap procedure (see [Peltzer et al 2016](https://doi.org/10.1186/s13059-016-0918-z) for details). Bowtie 2 is similar to BWA aln, and has recently been suggested to provide slightly better results under certain conditions ([Poullet and Orlando 2020](https://doi.org/10.3389/fevo.2020.00105)), as well as providing extra functionality (such as FASTQ trimming).\n\nMore documentation can be seen for each tool under:\n\n- [BWA aln](http://bio-bwa.sourceforge.net/bwa.shtml#3)\n- [BWA mem](http://bio-bwa.sourceforge.net/bwa.shtml#3)\n- [CircularMapper](https://circularmapper.readthedocs.io/en/latest/contents/userguide.html)\n- [Bowtie 2](http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml#command-line)", + "fa_icon": "fas fa-hammer" }, "fasta_largeref": { "type": "boolean", - "description": "Specify to generate more recent '.csi' BAM indices. If your reference genome is larger than 3.5GB, this is recommended due to more efficient data handling with the '.csi' format over the older '.bai'.", - "help_text": "This parameter is required to be set for large reference genomes. If your reference genome is larger than 3.5GB, the `samtools index` calls in the pipeline need to generate `.csi` indices instead of `.bai` indices to compensate for the size of the reference genome (with samtools: `-c`). This parameter is not required for smaller references (including the human hg19 or grch37/grch38 references), but >4GB genomes have been shown to need `.csi` indices.", + "description": "Specify to generate '.csi' BAM indices instead of '.bai' for larger reference genomes.", + "help_text": "This parameter is required to be set for large reference genomes. If your reference genome is larger than 3.5GB, the `samtools index` calls in the pipeline need to generate `.csi` indices instead of `.bai` indices to compensate for the size of the reference genome (with samtools: `-c`). This parameter is not required for smaller references (including the human reference genomes hg19 or grch37/grch38).", "fa_icon": "fas fa-address-book" }, "mapping_bwaaln_n": { "type": "number", "default": 0.01, - "description": "Specify the -n parameter for BWA aln, i.e. amount of allowed mismatches in the alignment.", - "help_text": "Configures the `bwa aln -n` parameter, defining how many mismatches are allowed in a read. Default is set following recommendations from [Oliva et al. 2021](https://doi.org/10.1093/bib/bbab076) who tested when aligning to human reference genomes. \n\nIf you're uncertain what to set check out this [Shiny App](https://apeltzer.shinyapps.io/bwa-mismatches/) for more information on how to set this parameter efficiently.\n\n> Modifies bwa aln parameter: `-n`", + "description": "Specify the amount of allowed mismatches in the alignment for mapping with BWA aln.", + "help_text": "Specify how many mismatches are allowed in a read during alignment with BWA aln. Default is set following recommendations from [Oliva et al. 2021](https://doi.org/10.1093/bib/bbab076) who compared alignment to human reference genomes.\n\nIf you're uncertain what value to use, check out this [Shiny App](https://apeltzer.shinyapps.io/bwa-mismatches/) for more information.\n\n> Modifies BWA aln parameter: `-n`", "fa_icon": "fas fa-sort-numeric-down" }, "mapping_bwaaln_k": { "type": "integer", "default": 2, - "description": "Specify the -k parameter for BWA aln, i.e. maximum edit distance allowed in a seed.", - "help_text": "Configures the bwa aln `-k` parameter for the maximum edit distance during the seeding phase of the mapping algorithm.\n\n> Modifies BWA aln parameter: `-k`", + "description": "Specify the maximum edit distance allowed in a seed for mapping with BWA aln.", + "help_text": "Specify the maximum edit distance during the seeding phase of the BWA aln mapping algorithm.\n\n> Modifies BWA aln parameter: `-k`", "fa_icon": "fas fa-people-arrows" }, "mapping_bwaaln_l": { "type": "integer", "default": 1024, - "description": "Specify the -l parameter for BWA aln i.e. the length of seeds to be used.", - "help_text": "Configures the length of the seed used in bwa aln `-l`. Default is set to be 'turned off' at the recommendation of [Oliva et al. 2021](https://doi.org/10.1093/bib/bbab076) who tested when aligning to human reference genomes. Seeding is 'turned off' by specifying an arbitrarily long number to force the entire read to act as the seed. \n\nNote: Despite being recommended, turning off seeding can result in long runtimes!\n\n> Modifies BWA aln parameter: `-l`", + "description": "Specify the length of seeds to be used for BWA aln.", + "help_text": "Specify the length of the seed used in BWA aln. Default is set to be 'turned off' at the recommendation of [Oliva et al. 2021](https://doi.org/10.1093/bib/bbab076) who tested when aligning to human reference genomes. Seeding is 'turned off' by specifying an arbitrarily long number to force the entire read to act as the seed.\n\nNote: Despite being recommended, turning off seeding can result in long runtimes!\n\n> Modifies BWA aln parameter: `-l`", "fa_icon": "fas fa-ruler-horizontal" }, "mapping_bwaaln_o": { "type": "integer", "default": 2, - "description": "Specify the -o parameter for BWA aln i.e. the number of gaps allowed.", - "help_text": "Configures the number of gaps used in bwa aln. Default is set to bwa default.\n\n> Modifies BWA aln parameter: `-o`", + "description": "Specify the number of gaps allowed for alignment with BWA aln.", + "help_text": "Specify the number of gaps allowed for mapping with BWA aln. Default is set to BWA default.\n\n> Modifies BWA aln parameter: `-o`", "fa_icon": "fas fa-people-arrows" }, "mapping_bwamem_k": { "type": "integer", "default": 19, - "description": "Specify the -k parameter for BWA mem i.e. the minimum seed length", - "help_text": "Configures the minimum seed length used in BWA-MEM. Default is set to BWA default.\n\n> Modifies BWA-MEM parameter: `-k`", + "description": "Specify the minimum seed length for alignment with BWA mem.", + "help_text": "Configures the minimum seed length used in BWA mem. Default is set to BWA default.\n\n> Modifies BWA mem parameter: `-k`", "fa_icon": "fas fa-seedling" }, "mapping_bwamem_r": { "type": "number", "default": 1.5, - "description": "Specify the -k parameter for BWA mem i.e. the minimum seed length", - "help_text": "Configures the re-seeding used in BWA-MEM. Default is set to BWA default.\n\n> Modifies BWA-MEM parameter: `-r`", + "description": "Specify the re-seeding threshold for alignment with BWA mem.", + "help_text": "Configures the re-seeding threshold used in BWA mem. Default is set to BWA default.\n\n> Modifies BWA mem parameter: `-r`", "fa_icon": "fas fa-angle-double-left" }, "mapping_bowtie2_alignmode": { "type": "string", "default": "local", - "description": "Specify the bowtie2 alignment mode.", - "help_text": "The type of read alignment to use. Local allows only partial alignment of read, with ends of reads possibly 'soft-clipped' (i.e. remain unaligned/ignored), if the soft-clipped alignment provides best alignment score. End-to-end requires all nucleotides to be aligned. \nDefault is set following [Cahill et al (2018)](https://doi.org/10.1093/molbev/msy018) and [Poullet and Orlando 2020](https://www.frontiersin.org/articles/10.3389/fevo.2020.00105/full)\n\n> Modifies Bowtie2 presets: `--local`, `--end-to-end`", - "fa_icon": "fas fa-arrows-alt-h", + "description": "Specify the Bowtie 2 alignment mode.", + "help_text": "Specify the type of read alignment to use with Bowtie 2. 'Local' allows only partial alignment of read with ends of reads possibly 'soft-clipped' (i.e. remain unaligned/ignored), if the soft-clipped alignment provides best alignment score. 'End-to-end' requires all nucleotides to be aligned.\nDefault is set following [Cahill et al (2018)](https://doi.org/10.1093/molbev/msy018) and [Poullet and Orlando 2020](https://www.frontiersin.org/articles/10.3389/fevo.2020.00105/full)\n\n> Modifies Bowtie 2 presets: `--local`, `--end-to-end`", + "fa_icon": "fas fa-toggle-on", "enum": ["local", "end-to-end"] }, "mapping_bowtie2_sensitivity": { "type": "string", "default": "sensitive", - "description": "Specify the level of sensitivity for the bowtie2 alignment mode.", - "help_text": "The Bowtie2 'preset' to use. These strings apply to both --mapping_bowtie2_alignmode options. See the Bowtie2 manual for actual settings. \nDefault is set following [Poullet and Orlando (2020)](https://www.frontiersin.org/articles/10.3389/fevo.2020.00105/full), when running damaged-data without UDG treatment)\n\nModifies the Bowtie2 parameters: `--fast`, `--very-fast`, `--sensitive`, `--very-sensitive`, `--fast-local`, `--very-fast-local`, `--sensitive-local`, `--very-sensitive-local`", + "description": "Specify the level of sensitivity for the Bowtie 2 alignment mode.", + "help_text": "Specify the Bowtie 2 'preset' to use. These strings apply to both `--mapping_bowtie2_alignmode` options. See the Bowtie 2 manual for actual settings.\nDefault is set following [Poullet and Orlando (2020)](https://www.frontiersin.org/articles/10.3389/fevo.2020.00105/full), when running damaged-data without UDG treatment.\n\n> Modifies the Bowtie 2 parameters: `--fast`, `--very-fast`, `--sensitive`, `--very-sensitive`, `--fast-local`, `--very-fast-local`, `--sensitive-local`, `--very-sensitive-local`", "fa_icon": "fas fa-microscope", "enum": ["fast", "very-fast", "sensitive", "very-sensitive"] }, "mapping_bowtie2_n": { "type": "integer", "default": 0, - "description": "Specify the -N parameter for bowtie2 (mismatches in seed). This will override defaults from alignmode/sensitivity.", - "help_text": "The number of mismatches allowed in the seed during seed-and-extend procedure of Bowtie2. This will override any values set with --mapping_bowtie2_sensitivity. Can either be 0 or 1.\n\n>Modifies Bowtie2 parameter: `-N`", + "description": "Specify the number of mismatches in seed for alignment with Bowtie 2.", + "help_text": "Specify the number of mismatches allowed in the seed during seed-and-extend procedure of Bowtie 2. This will override any values set with `--mapping_bowtie2_sensitivity`. Can either be 0 or 1.\n\n> Modifies Bowtie 2 parameter: `-N`", "fa_icon": "fas fa-sort-numeric-down" }, "mapping_bowtie2_l": { "type": "integer", "default": 20, - "description": "Specify the -L parameter for bowtie2 (length of seed substrings). This will override defaults from alignmode/sensitivity.", - "help_text": "The length of the seed sub-string to use during seeding. This will override any values set with --mapping_bowtie2_sensitivity.\n\n> Modifies Bowtie2 parameter: `-L`", + "description": "Specify the length of seed substrings for Bowtie 2.", + "help_text": "Specify the length of the seed sub-string to use during seeding of Bowtie 2. This will override any values set with `--mapping_bowtie2_sensitivity`.\n\n> Modifies Bowtie 2 parameter: `-L`", "fa_icon": "fas fa-ruler-horizontal" }, "mapping_bowtie2_trim5": { "type": "integer", "default": 0, - "description": "Specify number of bases to trim off from 5' (left) end of read before alignment.", - "help_text": "Number of bases to trim at the 5' (left) end of read prior alignment. Maybe useful when left-over sequencing artefacts of in-line barcodes present.\n\n> Modifies Bowtie2 parameter: `--trim5`", + "description": "Specify the number of bases to trim off from 5 prime end of read before alignment with Bowtie 2.", + "help_text": "Specify the number of bases to trim at the 5' (left) end of read before alignment with Bowtie 2. This may be useful when left-over sequencing artefacts of in-line barcodes are present.\n\n> Modifies Bowtie 2 parameter: `--trim5`", "fa_icon": "fas fa-cut" }, "mapping_bowtie2_trim3": { "type": "integer", "default": 0, - "description": "Specify number of bases to trim off from 3' (right) end of read before alignment.", - "help_text": "Number of bases to trim at the 3' (right) end of read prior alignment. Maybe useful when left-over sequencing artefacts of in-line barcodes present.\n\n> Modifies Bowtie2 parameter: `--trim3`", + "description": "Specify the number of bases to trim off from 3 prime end of read before alignment with Bowtie 2.", + "help_text": "Specify the number of bases to trim at the 3' (right) end of read before alignment with Bowtie 2. This may be useful when left-over sequencing artefacts of in-line barcodes are present.\n\n> Modifies Bowtie 2 parameter: `--trim3`", "fa_icon": "fas fa-cut" }, "mapping_bowtie2_maxins": { @@ -603,7 +604,13 @@ "description": "Specify the maximum fragment length for Bowtie2 paired-end mapping mode only.", "help_text": "The maximum fragment for valid paired-end alignments. Only for paired-end mapping (i.e. unmerged), and therefore typically only useful for modern data.\n\n> Modifies Bowtie2 parameter: `--maxins`", "fa_icon": "fas fa-exchange-alt" - } + }, + "elongation_factor": { + "type": "integer", + "default": 500, + "description": "Specify the number of bases to extend reference by (circularmapper only)", + "help_text": "The number of bases to extend the reference genome with. By default this is set to 500 if not specified otherwise.", + "fa_icon": "fas fa-external-link-alt" }, "fa_icon": "fas fa-layer-group" }, @@ -615,54 +622,54 @@ "properties": { "run_bamfiltering": { "type": "boolean", - "description": "Turn on filtering of reads in BAM files after mapping. By default, only mapped reads retained.", + "description": "Specify to turn on filtering of reads in BAM files after mapping. By default, only mapped reads retained.", "fa_icon": "fas fa-power-off", - "help_text": "Turns on the filtering subworkflow for mapped BAM files coming out of the read alignment step. Filtering includes removal of unmapped reads, length filtering, and mapping quality filtering.\n\nWhen turning on bam filtering, by default only the mapped/unmapped filter is activated, thus only mapped reads are retained for downstream analyses. See `--bamfiltering_retainunmappedgenomicbam` to retain unmapped reads, if filtering only for length and/or quality is preferred.\n\nNote this subworkflow can also be activated if `--run_metagenomic_screening` is supplied." + "help_text": "Specify to turns on the filtering subworkflow for mapped BAM files after the read alignment step. Filtering includes removal of unmapped reads, length filtering, and mapping quality filtering.\n\nWhen turning on BAM filtering, by default only the mapped/unmapped filter is activated, thus only mapped reads are retained for downstream analyses. See `--bamfiltering_retainunmappedgenomicbam` to retain unmapped reads, if filtering only for length and/or quality is preferred.\n\nNote this subworkflow can also be activated if `--run_metagenomic_screening` is supplied." }, "bamfiltering_minreadlength": { "type": "integer", "default": 0, "description": "Specify the minimum read length mapped reads should have for downstream genomic analysis.", - "help_text": "You can use this to remove mapped reads that fall below a certain length after mapping.\n\nThis can be useful to get more realistic 'endogenous DNA' or 'on target read' percentages.\n\nIf used _instead of_ minimum length read filtering at AdapterRemoval, you can get more more realistic endogenous DNA estimates when most of your reads are very short (e.g. in single-stranded libraries or samples with highly degraded DNA). In these cases, the default minimum length filter at earlier adapter clipping/read merging will remove a very large amount of your reads in your library (including valid reads), thus making an artificially small denominator for a typical endogenous DNA calculation. \n\nTherefore by retaining all of your reads until _after_ mapping (i.e., turning off the adapter clipping/read merging filter), you can generate more 'real' endogenous DNA estimates immediately after mapping (with a better denominator). Then after estimating this, filter using this parameter to retain only 'useful' reads (i.e., those long enough to provide higher confidence of their mapped position) for downstream analyses.\n\nBy specifying `0`, no length filtering is performed.\n\nNote that by default the output BAM files of this step are _not_ stored in the results directory (as it is assumed that deduplicated BAM files are preferred). See `--bamfiltering_savefilteredbams` if you wish to save these.\n\n> Modifies tool parameter(s):\n> - filter_bam_fragment_length.py: `-l`", - "fa_icon": "fas fa-ruler-horizontal" + "help_text": "Specify to remove mapped reads that fall below a certain length threshold after mapping.\n\nThis can be useful to get more realistic 'endogenous DNA' or 'on target read' percentages.\n\nIf used _instead of_ minimum length read filtering at AdapterRemoval, you can get more more realistic endogenous DNA estimates when most of your reads are very short (e.g. in single-stranded libraries or samples with highly degraded DNA). In these cases, the default minimum length filter at earlier adapter clipping/read merging will remove a very large amount of your reads in your library (including valid reads), thus making an artificially small denominator for a typical endogenous DNA calculation.\n\nTherefore by retaining all of your reads until _after_ mapping (i.e., turning off the adapter clipping/read merging filter), you can generate more 'real' endogenous DNA estimates immediately after mapping (with a better denominator). Then after estimating this, filter using this parameter to retain only 'useful' reads (i.e., those long enough to provide higher confidence of their mapped position) for downstream analyses.\n\nBy specifying `0`, no length filtering is performed.\n\nNote that by default the output BAM files of this step are _not_ stored in the results directory (as it is assumed that deduplicated BAM files are preferred). See `--bamfiltering_savefilteredbams` if you wish to save these.\n\n> Modifies filter_bam_fragment_length.py parameter: `-l`", + "fa_icon": "fas fa-filter" }, "bamfiltering_mappingquality": { "type": "integer", "default": 0, "description": "Specify the minimum mapping quality reads should have for downstream genomic analysis.", - "help_text": "Specify a mapping quality threshold for mapped reads to be kept for downstream analysis.\n\nBy default all reads are retained and is therefore set to 0 to ensure no quality filtering is performed.\n\nNote that by default the output BAM files of this step are _not_ stored in the results directory (as it is assumed that deduplicated BAM files are preferred). See `--bamfiltering_savefilteredbams` if you wish to save these.\n\n> Modifies tool parameter(s):\n> - samtools view `-q`", - "fa_icon": "fas fa-thermometer-full" + "help_text": "Specify a mapping quality threshold for mapped reads to be kept for downstream analysis.\n\nBy default all reads are retained and this option is therefore set to 0 to ensure no quality filtering is performed.\n\nNote that by default the output BAM files of this step are _not_ stored in the results directory (as it is assumed that deduplicated BAM files are preferred). See `--bamfiltering_savefilteredbams` if you wish to save these.\n\n> Modifies samtools view parameter: `-q`", + "fa_icon": "fas fa-filter" }, "bamfilter_genomicbamfilterflag": { "type": "integer", "default": 4, "fa_icon": "fas fa-flag", - "description": "Specify the SAM format flag of reads to remove during BAM filtering for downstream genomic steps. Generally not recommended to change.", - "help_text": "You can use this to customise the exact SAM format flag of reads you wish to _remove_ from your BAM file to for downstream _genomic_ analyses.\n\nYou can explore more using a tool from the Broad institute [here](https://broadinstitute.github.io/picard/explain-flags.html)\n\n> \u26a0\ufe0f Modify at your own risk, alternative flags are not necessarily supported in downstream steps!\n\n> Modifies tool parameter(s):\n> - SAMtools: `-F`" + "description": "Specify the SAM format flag of reads to remove during BAM filtering for downstream genomic steps.", + "help_text": "Specify to customise the exact SAM format flag of reads you wish to _remove_ from your BAM file to for downstream _genomic_ analyses.\n\nYou can explore more using a tool from the Broad Institute [here](https://broadinstitute.github.io/picard/explain-flags.html)\n\n> ⚠️ Modify at your own risk, alternative flags are not necessarily supported in downstream steps!\n\n> Modifies samtools parameter: `-F`" }, "bamfiltering_retainunmappedgenomicbam": { "type": "boolean", "description": "Specify to retain unmapped reads in the BAM file used for downstream genomic analyses.", - "help_text": "You can use this parameter to retain unmapped reads (optionally also length filtered) in the genomic BAM for downstream analysis. By default, the pipeline only keeps mapped reads for downstream analysis.\n\nThis is also turned on if `--metagenomicscreening_input` is set to `all`.\n\n> \u26a0\ufe0f This will likely slow down run time of downstream pipeline steps!\n\n> Modifies tool parameter(s):\n> - samtools view: `-f 4` / `-F 4`", + "help_text": "Specify to retain unmapped reads (optionally also length-filtered) in the genomic BAM for downstream analysis. By default, the pipeline only keeps mapped reads for downstream analysis.\n\nThis is also turned on if `--metagenomicscreening_input` is set to `all`.\n\n> ⚠️ This will likely slow down run time of downstream pipeline steps!\n\n> Modifies samtools view parameters: `-f 4` / `-F 4`", "fa_icon": "fas fa-piggy-bank" }, "bamfiltering_generateunmappedfastq": { "type": "boolean", - "description": "Generate FASTQ files containing only unmapped reads from the aligner generated BAM files.", - "help_text": "This turns on the generation and saving of FASTQs of only the unmapped reads from the mapping step in the results directory, using `samtools fastq`.\n\nThis could be useful if you wish to do other analysis of the unmapped reads independently of the pipeline.\n\nNote: the reads in these FASTQ files have _not_ undergone length of quality filtering\n\n> Modifies tool parameter(s):\n> - samtools fastq: `-f 4`", + "description": "Specify to generate FASTQ files containing only unmapped reads from the aligner generated BAM files.", + "help_text": "Specify to turn on the generation and saving of FASTQs of only the unmapped reads from the mapping step in the results directory.\n\nThis can be useful if you wish to do other analysis of the unmapped reads independently of the pipeline.\n\nNote: the reads in these FASTQ files have _not_ undergone length of quality filtering\n\n> Modifies samtools fastq parameter: `-f 4`", "fa_icon": "fas fa-file-alt" }, "bamfiltering_generatemappedfastq": { "type": "boolean", - "description": "Generate FASTQ files containing only mapped reads from the aligner generated BAM files .", - "help_text": "This turns on the generation and saving of FASTQs of only the mapped reads from the mapping step in the results directory, using `samtools fastq`.\n\nThis could be useful if you wish to do other analysis of the mapped reads independently of the pipeline, such as remapping with different parameters (whereby only including mapped reads will speed up computation time during the re-mapping due to reduced input data).\n\nNote the reads in these FASTQ files have _not_ undergone length of quality filtering\n\n> Modifies tool parameter(s):\n> - samtools fastq: `-F 4`", + "description": "Specify to generate FASTQ files containing only mapped reads from the aligner generated BAM files.", + "help_text": "Specify to turn on the generation and saving of FASTQs of only the mapped reads from the mapping step in the results directory.\n\nThis can be useful if you wish to do other analysis of the mapped reads independently of the pipeline, such as remapping with different parameters (whereby only including mapped reads will speed up computation time during the re-mapping due to reduced input data).\n\nNote the reads in these FASTQ files have _not_ undergone length of quality filtering\n\n> Modifies samtools fastq parameter: `-F 4`", "fa_icon": "far fa-file-alt" }, "bamfiltering_savefilteredbams": { "type": "boolean", - "description": "Save in the results directory the intermediate filtered genomic BAM files that are sent for downstream genomic analyses.", - "help_text": "This saves intermediate length and/or quality filtered genomic BAM files in the results directory.", - "fa_icon": "far fa-save" + "description": "Specify to save the intermediate filtered genomic BAM files in the results directory.", + "help_text": "Specify to save intermediate length- and/or quality-filtered genomic BAM files in the results directory.", + "fa_icon": "fas fa-save" } }, "fa_icon": "fas fa-filter" @@ -670,64 +677,64 @@ "metagenomics": { "title": "Metagenomics", "type": "object", - "description": "Options to related to metagenomic screening.", + "description": "Options related to metagenomic screening.", "default": "", "properties": { "run_metagenomicscreening": { "type": "boolean", - "description": "Turn on metagenomic screening of mapped, unmapped, or all reads.", + "description": "Specify to turn on metagenomic screening of mapped, unmapped or all reads.", "fa_icon": "fas fa-power-off", - "help_text": "Turns on the metagenomic screening subworkflow of the pipeline, where reads are screened against large databases. Typically used for pathogen screening or microbial community analysis.\n\nIf supplied, this will also turn on the BAM filtering subworkflow of the pipeline." + "help_text": "Specify to turn on the metagenomic screening subworkflow of the pipeline, where reads are screened against large databases. Typically used for pathogen screening or microbial community analysis.\n\nIf supplied, this will also turn on the BAM filtering subworkflow of the pipeline." }, "metagenomicscreening_input": { "type": "string", "default": "unmapped", - "description": "Specify which type of reads to go into metagenomic screening.", + "description": "Specify which type of reads to use for metagenomic screening.", "enum": ["unmapped", "mapped", "all"], "fa_icon": "fas fa-hand-pointer", - "help_text": "You can select which reads coming out of the read alignment step will be sent for metagenomic analysis.\n\nThis influences which reads are sent to this step, whether you want unmapped reads (used in most cases, as 'host reads' can often be contaminants in microbial genomes), mapped reads (e.g, when doing competitive against a genomic reference of multiple genomes and which to apply LCA correction), or all reads.\n\n> \u26a0\ufe0f If you skip paired-end merging, all reads will be screened as independent reads - not as pairs! - as all FASTQ files from BAM filtering are merged into one. This merged file is _not_ saved in results directory.\n\n> Modifies tool parameter(s):\n> - samtools fastq: `-f 4` / `-F 4`" + "help_text": "Specify to select which mapped reads will be sent for metagenomic analysis.\n\nThis influences which reads are sent to this step, whether you want unmapped reads (used in most cases, as 'host reads' can often be contaminants in microbial genomes), mapped reads (e.g, when doing competitive against a genomic reference of multiple genomes and which to apply LCA correction) or all reads.\n\n> ⚠️ If you skip paired-end merging, all reads will be screened as independent reads - not as pairs! - as all FASTQ files from BAM filtering are merged into one. This merged file is _not_ saved in results directory.\n\n> Modifies samtools fastq parameters: `-f 4` / `-F 4`" }, "run_metagenomics_complexityfiltering": { "type": "boolean", "fa_icon": "fas fa-power-off", - "help_text": "Turns on a subworkflow of the pipeline that filters the fastq-files for complexity before the metagenomics profiling\nUse the metagenomics_complexity_tool parameter to select a method", - "description": "Run a complexity filter on the metagenomics input files before classification. Specifiy the tool to use with the `metagenomics_complexity_tool` parameter, save with `metagenomics_complexity_savefastq`" + "help_text": "Specify to turn on a subworkflow of the pipeline that filters the FASTQ files for complexity before the metagenomics profiling.\nUse the metagenomics_complexity_tool parameter to select a method.", + "description": "Specify to run a complexity filter on the metagenomics input files before classification." }, "metagenomics_complexity_savefastq": { "type": "boolean", "fa_icon": "fas fa-save", - "description": "Save FASTQ files containing the complexity filtered reads (before metagenomic classification).", - "help_text": "Save the complexity-filtered fastq-files to the results directory" + "description": "Specify to save FASTQ files containing the complexity-filtered reads before metagenomic classification.", + "help_text": "Specify to save the complexity-filtered FASTQ files to the results directory." }, "metagenomics_complexity_tool": { "type": "string", "default": "bbduk", - "description": "Specify which tool to use for trimming, filtering, or reformatting of fastq reads that go into metagenomics screening.", + "description": "Specify which tool to use for trimming, filtering or reformatting of FASTQ reads that go into metagenomics screening.", "enum": ["bbduk", "prinseq"], - "fa_icon": "fas fa-hand-pointer", - "help_text": "You can select which tool is used to generate a final set of reads for the metagenomic classifier after any necessary trimming, filtering or reformatting of the reads.\n\nThis intermediate file is not saved in the results directory, unless marked with `--metagenomics_complexity_savefastq`." + "fa_icon": "fas fa-hammer", + "help_text": "Specify to select which tool is used to generate a final set of reads for the metagenomic classifier after any necessary trimming, filtering or reformatting of the reads.\n\nThis intermediate file is not saved in the results directory unless marked with `--metagenomics_complexity_savefastq`." }, "metagenomics_complexity_entropy": { "type": "number", "fa_icon": "fas fa-sort-numeric-up", - "description": "Specify the entropy threshold that under which a sequencing read will be complexity filtered out. This should be between 0-1.", + "description": "Specify the entropy threshold under which a sequencing read will be complexity-filtered out.", "default": 0.3, - "help_text": "Specify the minimum 'entropy' value for complexity filtering for the BBDuk or PRINSEQ++ tools.\n\nThis value will only be used for PRINSEQ++ if `--metagenomics_prinseq_mode` is set to `entropy`.\n\nEntropy here corresponds to the amount of sequence variation exists within the read. Higher values correspond to more variety, and thus will likely result in more specific matching to a taxon's reference genome. The trade off here is fewer reads (or abundance information) available for having a confident identification.\n\n> Modifies tool parameter(s):\n> - BBDuk: `entropy=`\n> - PRINSEQ++: `-lc_entropy`\n\n" + "help_text": "Specify the minimum 'entropy' value for complexity filtering for the BBDuk or PRINSEQ++ tools.\n\nThis value will only be used for PRINSEQ++ if `--metagenomics_prinseq_mode` is set to `entropy`.\n\nEntropy here corresponds to the amount of sequence variation existing within the read. Higher values correspond to more variety and thus will likely result in more specific matching to a taxon's reference genome. The trade-off here is fewer reads (or abundance information) available for having a confident identification.\n\n> Modifies parameters:\n> - BBDuk: `entropy=`\n> - PRINSEQ++: `-lc_entropy`" }, "metagenomics_prinseq_mode": { "type": "string", "default": "entropy", "enum": ["entropy", "dust"], - "fa_icon": "fas fa-check-square", - "description": "Specify the complexity filter mode for PRINSEQ++", - "help_text": "Specify the complexity filter mode for PRINSEQ++ \n\nUse the selected mode together with the correct flag:\n'dust' requires the `--metagenomics_prinseq_dustscore` parameter set\n'entropy' requires the `--metagenomics_complexity_entropy` parameter set\n\n> Sets one of the tool parameter(s):\n> - PRINSEQ++: `-lc_entropy`\n> - PRINSEQ++: `-lc_dust`" + "fa_icon": "fas fa-toggle-on", + "description": "Specify the complexity filter mode for PRINSEQ++.", + "help_text": "Specify the complexity filter mode for PRINSEQ++.\n\nUse the selected mode together with the correct flag:\n'dust' requires the `--metagenomics_prinseq_dustscore` parameter set\n'entropy' requires the `--metagenomics_complexity_entropy` parameter set\n\n> Modifies parameters:\n> - PRINSEQ++: `-lc_entropy`\n> - PRINSEQ++: `-lc_dust`" }, "metagenomics_prinseq_dustscore": { "type": "number", "default": 0.5, - "fa_icon": "fas fa-head-side-mask", - "description": "Specify the minimum dust score for PRINTSEQ++ complexity filtering", - "help_text": "Specify the minimum dust score below which low-complexity reads will be removed. A DUST score is based on how often different tri-nucleotides occur along a read.\n\n> Modifies tool parameter(s):\n> - PRINSEQ++: `--lc_dust`" + "fa_icon": "fas fa-filter", + "description": "Specify the minimum dust score for PRINTSEQ++ complexity filtering.", + "help_text": "Specify the minimum dust score below which low-complexity reads will be removed. A dust score is based on how often different tri-nucleotides occur along a read.\n\n> Modifies PRINSEQ++ parameter: `--lc_dust`" } }, "fa_icon": "fas fa-search" @@ -747,9 +754,9 @@ "type": "string", "default": "markduplicates", "description": "Specify which tool to use for deduplication.", - "help_text": "Sets the duplicate read removal tool. Alternatively an ancient DNA specific read deduplication tool `dedup` (Peltzer et al. 2016) is offered. The latter utilises both ends of paired-end data to remove duplicates (i.e. true exact duplicates, as markduplicates will over-zealously deduplicate anything with the same starting position even if the ends are different).\n\n> \u26a0\ufe0f DeDup can only be used on collapsed (i.e. merged) reads from paired-end sequencing.", + "help_text": "Specify which duplicate read removal tool to use. While `markduplicates` is set by default, an ancient DNA specific read deduplication tool `dedup` is offered (see [Peltzer et al. 2016](https://doi.org/10.1186/s13059-016-0918-z) for details). The latter utilises both ends of paired-end data to remove duplicates (i.e. true exact duplicates, as markduplicates will over-zealously deduplicate anything with the same starting position even if the ends are different).\n\n> ⚠️ DeDup can only be used on collapsed (i.e. merged) reads from paired-end sequencing.", "enum": ["markduplicates", "dedup"], - "fa_icon": "fas fa-layer-group" + "fa_icon": "fas fa-hammer" } }, "fa_icon": "fas fa-clone" @@ -763,127 +770,127 @@ "properties": { "run_mapdamage_rescaling": { "type": "boolean", - "fa_icon": "fas fa-map", - "description": "Turn on damage rescaling of BAM files using mapDamage2 to probabilistically remove damage.", - "help_text": "Turns on mapDamage2's BAM rescaling functionality. This probabilistically replaces Ts back to Cs depending on the likelihood this reference-mismatch was originally caused by damage. If the library is specified to be single stranded, this will automatically use the --single-stranded mode.\nThis process will ameliorate the effects of aDNA damage, but also increase reference-bias. \n\n**This functionality does not have any MultiQC output.**\nwarning: rescaled libraries will not be merged with non-scaled libraries of the same sample for downstream genotyping, as the model may be different for each library. If you wish to merge these, please do this manually and re-run nf-core/eager using the merged BAMs as input.\n\n> Modifies the `--rescale` parameter of mapDamage2" + "fa_icon": "fas fa-power-off", + "description": "Specify to turn on damage rescaling of BAM files using mapDamage2 to probabilistically remove damage.", + "help_text": "Specify to turn on mapDamage2's BAM rescaling functionality. This probabilistically replaces Ts back to Cs depending on the likelihood this reference-mismatch was originally caused by damage. If the library is specified to be single-stranded, this will automatically use the `--single-stranded` mode.\nThis process will ameliorate the effects of aDNA damage, but also increase reference-bias.\n\n**This functionality does not have any MultiQC output.**\n ⚠️ Rescaled libraries will not be merged with non-scaled libraries of the same sample for downstream genotyping, as the model may be different for each library. If you wish to merge these, please do this manually and re-run nf-core/eager using the merged BAMs as input.\n\n> Modifies mapDamage2 parameter: `--rescale`" }, "damage_manipulation_rescale_seqlength": { "type": "integer", "default": 12, - "description": "Length of read sequence to use from each side for rescaling. Can be overridden by `--rescalelength*p`.", - "help_text": "Specify the length in bp from the end of the read that mapDamage should rescale at both ends.\n\n> Modifies the `--seq-length` parameter of mapDamage2.", + "description": "Specify the length of read sequence to use from each side for rescaling.", + "help_text": "Specify the length in bp from the end of the read that mapDamage should rescale at both ends. This can be overridden by `--rescalelength*p`.\n\n> Modifies mapDamage2 parameter: `--seq-length`", "fa_icon": "fas fa-ruler-horizontal" }, "damage_manipulation_rescale_length_5p": { "type": "integer", "default": 0, - "description": "Length of read for mapDamage2 to rescale from 5p end. Only used if not 0, otherwise `--rescale_seqlength` used.", - "help_text": "Specify the length in bp from the end of the read that mapDamage should rescale. Overrides `--rescale_seqlength`.\n\n> Modifies the `--rescale-length-5p` parameter of mapDamage2.", + "description": "Specify the length of read for mapDamage2 to rescale from 5 prime end.", + "help_text": "Specify the length in bp from the end of the read that mapDamage should rescale. This overrides `--rescale_seqlength`.\n\n> Modifies mapDamage2 parameter: `--rescale-length-5p`", "fa_icon": "fas fa-balance-scale-right" }, "damage_manipulation_rescale_length_3p": { "type": "integer", "default": 0, - "description": "Length of read for mapDamage2 to rescale from 3p end. Only used if not 0 otherwise `--rescale_seqlength` used.", - "help_text": "Specify the length in bp from the end of the read that mapDamage should rescale. Overrides `--rescale_seqlength`.\n\n> Modifies the `--rescale-length-3p` parameter of mapDamage2.", + "description": "Specify the length of read for mapDamage2 to rescale from 3 prime end.", + "help_text": "Specify the length in bp from the end of the read that mapDamage should rescale. This overrides `--rescale_seqlength`.\n\n> Modifies mapDamage2 parameter `--rescale-length-3p`", "fa_icon": "fas fa-balance-scale-left" }, "run_pmd_filtering": { "type": "boolean", - "description": "Turn on PMDtools filtering.", - "help_text": "Specifies to run PMDtools for damage based read filtering in sequencing libraries.", + "description": "Specify to turn on PMDtools filtering.", + "help_text": "Specify to run PMDtools for damage-based read filtering in sequencing libraries.", "fa_icon": "fas fa-power-off" }, "damage_manipulation_pmdtools_threshold": { "type": "integer", "default": 3, "fa_icon": "far fa-chart-bar", - "description": "Specify PMDScore threshold for PMDtools.", - "help_text": "Specifies the PMDScore threshold to use in the pipeline when filtering BAM files for DNA damage. Only reads which surpass this damage score are considered for downstream DNA analysis.\n\n> Modifies PMDtools parameter: `--threshold`" + "description": "Specify PMD score threshold for PMDtools.", + "help_text": "Specify the PMDScore threshold to use when filtering BAM files for DNA damage. Only reads which surpass this damage score are considered for downstream analysis.\n\n> Modifies PMDtools parameter: `--threshold`" }, "damage_manipulation_pmdtools_masked_reference": { "type": "string", "fa_icon": "fas fa-mask", - "help_text": "Supplying a FASTA file will use this file as reference for `samtools calmd` prior to PMD filtering. /nSetting the SNPs that are part of the used capture set as `N` can alleviate reference bias when running PMD filtering on capture data, where you might not want the allele of a SNP to be counted as damage when it is a transition.", - "description": "Specify a masked FASTA file with positions to be used with pmdtools.", + "help_text": "Specify a FASTA file to use as reference for `samtools calmd` prior to PMD filtering.\nSetting the SNPs that are part of the used capture set as `N` can alleviate reference bias when running PMD filtering on capture data, where you might not want the allele of a SNP to be counted as damage when it is a transition.", + "description": "Specify a masked FASTA file with positions to be used with PMDtools.", "pattern": "^\\S+\\.fa?(\\sta)$", "format": "file-path" }, "damage_manipulation_pmdtools_reference_mask": { "type": "string", "fa_icon": "fas fa-mask", - "help_text": "Supplying a bedfile to this parameter activates masking of the reference fasta at the contained sites prior to running PMDtools. Positions that are in the provided bedfile will be replaced by Ns in the reference genome. \nThis can alleviate reference bias when running PMD filtering on capture data, where you might not want the allele of a transition SNP to be counted as damage. Masking of the reference is done using `bedtools maskfasta`.", - "description": "Specify a bedfile to be used to mask the reference fasta prior to running pmdtools.", + "help_text": "Specify a BED file to activate masking of the reference FASTA at the contained sites prior to running PMDtools. Positions that are in the provided BED file will be replaced by Ns in the reference genome.\nThis can alleviate reference bias when running PMD filtering on capture data, where you might not want the allele of a transition SNP to be counted as damage. Masking of the reference is done using `bedtools maskfasta`.", + "description": "Specify a BED file to be used to mask the reference FASTA prior to running PMDtools.", "pattern": "^\\S+\\.bed?(\\.gz)$", "format": "file-path" }, "run_trim_bam": { "type": "boolean", - "fa_icon": "fas fa-eraser", - "description": "Turn on BAM trimming. Will only affect non-UDG or half-UDG libraries.", - "help_text": "Turns on the BAM trimming method. Trims off [n] bases from reads in the deduplicated BAM file. Damage assessment in PMDtools or DamageProfiler remains untouched, as data is routed through this independently. BAM trimming is typically performed to reduce errors during genotyping that can be caused by aDNA damage.\n\nBAM trimming will only affect libraries with 'damage_treatment' of 'none' or 'half'. Complete UDG treatment ('full') should have removed all damage, during library construction so trimming of 0 bp is performed. The amount of bases that will be trimmed off from each side of the molecule should be set separately for libraries with depending on their 'strandedness' and 'damage_treatment'.\n\n> Note: additional artefacts such as bar-codes or adapters should be removed prior to mapping and not in this step." + "fa_icon": "fas fa-power-off", + "description": "Specify to turn on BAM trimming for non-UDG or half-UDG libraries.", + "help_text": "Specify to turn on the BAM trimming of [n] bases from reads in the deduplicated BAM file. Damage assessment in PMDtools or DamageProfiler remains untouched, as data is routed through this independently. BAM trimming is typically performed to reduce errors during genotyping that can be caused by aDNA damage.\n\nBAM trimming will only affect libraries with 'damage_treatment' of 'none' or 'half'. Complete UDG treatment ('full') should have removed all damage during library construction, so trimming of 0 bp is performed. The amount of bases that will be trimmed off from each side of the molecule should be set separately for libraries depending on their 'strandedness' and 'damage_treatment'.\n\n> Note: additional artefacts such as barcodes or adapters should be removed prior to mapping and not in this step." }, "damage_manipulation_bamutils_trim_double_stranded_none_udg_left": { "type": "integer", "default": 0, - "fa_icon": "fas fa-ruler-combined", - "description": "Specify the number of bases to clip off reads from 'left' end of read for double-stranded non-UDG libraries.", - "help_text": "Default is set to 0, and therefore clips off no bases on the left side of reads from double-stranded libraries whose UDG treatment is set to'none'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-L`" + "fa_icon": "fas fa-cut", + "description": "Specify the number of bases to clip off reads from 'left' (5 prime) end of reads for double-stranded non-UDG libraries.", + "help_text": "Specify the number of bases to clip off reads from 'left' (5 prime) end of reads for double-stranded non-UDG libraries. By default, this is set to 0, and therefore clips off no bases on the left side of reads from double-stranded libraries whose UDG treatment is set to 'none'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-L`" }, "damage_manipulation_bamutils_trim_double_stranded_none_udg_right": { "type": "integer", "default": 0, - "fa_icon": "fas fa-ruler", - "description": "Specify the number of bases to clip off reads from 'right' end of read for double-stranded non-UDG libraries.", - "help_text": "Default is set to 0, and therefore clips off no bases on the right side of reads from double-stranded libraries whose UDG treatment is set to'none'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-R`" + "fa_icon": "fas fa-cut", + "description": "Specify the number of bases to clip off reads from 'right' (3 prime) end of reads for double-stranded non-UDG libraries.", + "help_text": "Specify the number of bases to clip off reads from 'right' (3 prime) end of reads for double-stranded non-UDG libraries. By default, this is set to 0, and therefore clips off no bases on the right side of reads from double-stranded libraries whose UDG treatment is set to 'none'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-R`" }, "damage_manipulation_bamutils_trim_double_stranded_half_udg_left": { "type": "integer", "default": 0, - "fa_icon": "fas fa-ruler-combined", - "help_text": "Default is set to 0, and therefore clips off no bases on the left side of reads from double-stranded libraries whose UDG treatment is set to 'half'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-L`", - "description": "Specify the number of bases to clip off reads from 'left' end of read for double-stranded half-UDG libraries." + "fa_icon": "fas fa-cut", + "help_text": "Specify the number of bases to clip off reads from 'left' (5 prime) end of read for double-stranded half-UDG libraries. By default, this is set to 0, and therefore clips off no bases on the left side of reads from double-stranded libraries whose UDG treatment is set to 'half'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-L`", + "description": "Specify the number of bases to clip off reads from 'left' (5 prime) end of read for double-stranded half-UDG libraries." }, "damage_manipulation_bamutils_trim_double_stranded_half_udg_right": { "type": "integer", "default": 0, - "fa_icon": "fas fa-ruler", - "description": "Specify the number of bases to clip off reads from 'right' end of read for double-stranded half-UDG libraries.", - "help_text": "Default is set to 0, and therefore clips off no bases on the right side of reads from double-stranded libraries whose UDG treatment is set to 'half'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-R`" + "fa_icon": "fas fa-cut", + "description": "Specify the number of bases to clip off reads from 'right' (3 prime) end of read for double-stranded half-UDG libraries.", + "help_text": "Specify the number of bases to clip off reads from 'right' (3 prime) end of read for double-stranded half-UDG libraries. By default, this is set to 0, and therefore clips off no bases on the right side of reads from double-stranded libraries whose UDG treatment is set to 'half'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-R`" }, "damage_manipulation_bamutils_trim_single_stranded_none_udg_left": { "type": "integer", "default": 0, - "fa_icon": "fas fa-ruler-combined", - "help_text": "Default is set to 0, and therefore clips off no bases on the left side of reads from single-stranded libraries whose UDG treatment is set to 'none'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-L`", - "description": "Specify the number of bases to clip off reads from 'left' end of read for single-stranded non-UDG libraries." + "fa_icon": "fas fa-cut", + "help_text": "Specify the number of bases to clip off reads from 'left' (5 prime) end of read for single-stranded non-UDG libraries. By default, this is set to 0, and therefore clips off no bases on the left side of reads from single-stranded libraries whose UDG treatment is set to 'none'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-L`", + "description": "Specify the number of bases to clip off reads from 'left' (5 prime) end of read for single-stranded non-UDG libraries." }, "damage_manipulation_bamutils_trim_single_stranded_none_udg_right": { "type": "integer", "default": 0, - "fa_icon": "fas fa-ruler", - "description": "Specify the number of bases to clip off reads from 'right' end of read for single-stranded non-UDG libraries.", - "help_text": "Default is set to 0, and therefore clips off no bases on the right side of reads from single-stranded libraries whose UDG treatment is set to 'none'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-R`" + "fa_icon": "fas fa-cut", + "description": "Specify the number of bases to clip off reads from 'right' (3 prime) end of read for single-stranded non-UDG libraries.", + "help_text": "Specify the number of bases to clip off reads from 'right' (3 prime) end of read for single-stranded non-UDG libraries. By default, this is set to 0, and therefore clips off no bases on the right side of reads from single-stranded libraries whose UDG treatment is set to 'none'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-R`" }, "damage_manipulation_bamutils_trim_single_stranded_half_udg_left": { "type": "integer", "default": 0, - "fa_icon": "fas fa-ruler-combined", - "help_text": "Default is set to 0, and therefore clips off no bases on the left side of reads from single-stranded libraries whose UDG treatment is set to 'half'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-L`", - "description": "Specify the number of bases to clip off reads from 'left' end of read for single-stranded half-UDG libraries." + "fa_icon": "fas fa-cut", + "help_text": "Specify the number of bases to clip off reads from 'left' (5 prime) end of read for single-stranded half-UDG libraries. By default, this is set to 0, and therefore clips off no bases on the left side of reads from single-stranded libraries whose UDG treatment is set to 'half'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-L`", + "description": "Specify the number of bases to clip off reads from 'left' (5 prime) end of read for single-stranded half-UDG libraries." }, "damage_manipulation_bamutils_trim_single_stranded_half_udg_right": { "type": "integer", "default": 0, - "fa_icon": "fas fa-ruler", - "description": "Specify the number of bases to clip off reads from 'right' end of read for single-stranded half-UDG libraries.", - "help_text": "Default is set to 0, and therefore clips off no bases on the right side of reads from single-stranded libraries whose UDG treatment is set to 'half'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-R`" + "fa_icon": "fas fa-cut", + "description": "Specify the number of bases to clip off reads from 'right' (3 prime) end of read for single-stranded half-UDG libraries.", + "help_text": "Specify the number of bases to clip off reads from 'right' (3 prime) end of read for single-stranded half-UDG libraries. By default, this is set to 0, and therefore clips off no bases on the right side of reads from single-stranded libraries whose UDG treatment is set to 'half'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-R`" }, "damage_manipulation_bamutils_softclip": { "type": "boolean", - "fa_icon": "fas fa-paint-roller", - "description": "Turn on using soft-trimming instead of hard masking.", - "help_text": "By default, nf-core/eager uses hard trimming, which sets trimmed bases to 'N' with quality '!' in the BAM output. Turn this on to use soft-trimming instead, which masks reads at the read ends using the CIGAR string instead.\n\n> Modifies bam trimBam parameter: `-c`" + "fa_icon": "fas fa-mask", + "description": "Specify to turn on soft-trimming instead of hard masking.", + "help_text": "Specify to turn on soft-trimming instead of hard masking of bases. By default, nf-core/eager uses hard trimming, which sets trimmed bases to 'N' with quality '!' in the BAM output. Turn this on to use soft-trimming instead, which masks reads at the read ends using the CIGAR string instead.\n\n> Modifies bamUtil's trimBam parameter: `-c`" } } }, @@ -896,28 +903,28 @@ "run_genotyping": { "type": "boolean", "fa_icon": "fas fa-power-off", - "description": "Turn on genotyping of BAM files.", - "help_text": "Turns on genotyping. `--genotyping_source` and `--genotyping_tool` must also be provided together with this option." + "description": "Specify to turn on genotyping of BAM files.", + "help_text": "Specify to turn on genotyping. `--genotyping_source` and `--genotyping_tool` must also be provided together with this option." }, "genotyping_source": { "type": "string", "description": "Specify which input BAM to use for genotyping.", - "help_text": "Indicates which BAM file to use for genotyping, depending on what BAM processing modules you have turned on. Options are: 'raw' (to use the reads used as input for damage manipulation); 'pmd' (for pmdtools output); 'trimmed' (for base-clipped BAMs. Base-clipped-PMD-filtered BAMs if both filtering and trimming are requested); 'rescaled' (for mapDamage2 rescaling output).\nWarning: Depending on the parameters you provided, 'raw' can refer to all mapped reads, filtered reads (if bam filtering has been performed), or the deduplicated reads (if deduplication was performed).", + "help_text": "Specify which BAM file to use for genotyping, depending on what BAM processing modules you have turned on. Options are: 'raw' (to use the reads used as input for damage manipulation); 'pmd' (for pmdtools output); 'trimmed' (for base-clipped BAMs. Base-clipped-PMD-filtered BAMs if both filtering and trimming are requested); 'rescaled' (for mapDamage2 rescaling output).\nWarning: Depending on the parameters you provided, 'raw' can refer to all mapped reads, filtered reads (if BAM filtering has been performed), or the deduplicated reads (if deduplication was performed).", "fa_icon": "fas fa-faucet", "enum": ["raw", "pmd", "trimmed", "rescaled"] }, "genotyping_tool": { "type": "string", - "fa_icon": "fas fa-tools", + "fa_icon": "fas fa-hammer", "enum": ["ug", "hc", "freebayes", "pileupcaller", "angsd"], - "help_text": "Specifies which genotyper to use. Current options are: GATK (v3.5) UnifiedGenotyper or GATK Haplotype Caller (v4); and the FreeBayes Caller.\n\n> Note that while UnifiedGenotyper is more suitable for low-coverage ancient DNA (HaplotypeCaller does de novo assembly around each variant site), be aware GATK 3.5 it is officially deprecated by the Broad Institute (but is used here for compatibility with MultiVCFAnalyzer).", - "description": "Specify which genotyper to use between: GATK UnifiedGenotyper, GATK HaplotypeCaller, Freebayes, or pileupCaller." + "help_text": "Specify which genotyper to use. Current options are: pileupCaller, ANGSD, GATK UnifiedGenotyper (v3.5), GATK HaplotypeCaller (v4) or FreeBayes.\n\n> Note that while UnifiedGenotyper is more suitable for low-coverage ancient DNA (HaplotypeCaller does de novo assembly around each variant site), be aware GATK v3.5 it is officially deprecated by the Broad Institute (but is used here for compatibility with MultiVCFAnalyzer).", + "description": "Specify which genotyper to use." }, "skip_bcftools_stats": { "type": "boolean", - "fa_icon": "far fa-chart-bar", - "description": "Skip bcftools stats generation for VCF based variant calling statistics", - "help_text": "Disables running of `bcftools stats` against VCF files from GATK and FreeBayes genotypers.\n\nIf ran, `bcftools stats` will automatically include the FASTA reference for INDEL-related statistics." + "fa_icon": "fas fa-forward", + "description": "Specify to skip generation of VCF-based variant calling statistics with bcftools.", + "help_text": "Specify to disable running of `bcftools stats` against VCF files from GATK and FreeBayes genotypers.\n\nThis will automatically include the FASTA reference for INDEL-related statistics." }, "genotyping_reference_ploidy": { "type": "integer", @@ -929,27 +936,27 @@ "genotyping_pileupcaller_min_base_quality": { "type": "integer", "default": 30, - "description": "The base mapping quality to be used for genotyping with pileupcaller.", - "help_text": "The minimum base quality to be used when generating the samtools mpileup used as input for genotyping with pileupCaller. \n\n> Modifies samtools mpileup parameter: `-Q`.", + "description": "Specify the base mapping quality to be used for genotyping with pileupCaller.", + "help_text": "Specify the minimum base quality to be used when generating the samtools mpileup used as input for genotyping with pileupCaller.\n\n> Modifies samtools mpileup parameter: `-Q`.", "fa_icon": "fas fa-filter" }, "genotyping_pileupcaller_min_map_quality": { "type": "integer", "default": 30, "fa_icon": "fas fa-filter", - "description": "The minimum mapping quality to be used for genotyping with pileupcaller.", - "help_text": "The minimum mapping quality to be used when generating the samtools mpileup used as input for genotyping with pileupCaller. \n\n> Modifies samtools mpileup parameter: `-q`." + "description": "Specify the minimum mapping quality to be used for genotyping with pileupCaller.", + "help_text": "Specify the minimum mapping quality to be used when generating the samtools mpileup used as input for genotyping with pileupCaller.\n\n> Modifies samtools mpileup parameter: `-q`." }, "genotyping_pileupcaller_bedfile": { "type": "string", "fa_icon": "fas fa-bed", - "help_text": "Specify a SNP panel in the form of a bed file of sites at which to generate a pileup for pileupCaller.", + "help_text": "Specify a SNP panel in the form of a BED file of sites at which to generate a pileup for pileupCaller.", "format": "file-path", - "description": "Specify the path to SNP panel in bed format for pileupCaller." + "description": "Specify the path to SNP panel in BED format for pileupCaller." }, "genotyping_pileupcaller_snpfile": { "type": "string", - "help_text": "Specify a SNP panel in [EIGENSTRAT](https://github.com/DReichLab/EIG/blob/master/CONVERTF/README) format, pileupCaller will call these sites.", + "help_text": "Specify a SNP panel in [EIGENSTRAT](https://github.com/DReichLab/EIG/blob/master/CONVERTF/README) format of sites to be called with pileupCaller.", "fa_icon": "fas fa-sliders-h", "format": "file-path", "description": "Specify the path to SNP panel in EIGENSTRAT format for pileupCaller." @@ -958,15 +965,15 @@ "type": "string", "default": "randomHaploid", "fa_icon": "fas fa-toolbox", - "description": "Specify the SNP calling method to use for genotyping.", - "help_text": "Specify the SNP calling method to use for genotyping. 'randomHaploid' will randomly sample a read overlapping the SNP, and produce a homozygous genotype with the allele supported by that read (often called 'pseudohaploid' or 'pseudodiploid'). 'randomDiploid` will randomly sample two reads overlapping the SNP and produce a genotype comprised of the two alleles supported by the two reads. 'majorityCall' will produce a genotype that is homozygous for the allele that appears in the majority of reads overlapping the SNP.\n\n> Modifies pileupCaller parameters: `--randomHaploid` `--randomDiploid` `--majorityCall`", + "description": "Specify the SNP calling method to use for genotyping with pileupCaller.", + "help_text": "Specify the SNP calling method to use for genotyping. 'randomHaploid' will randomly sample a read overlapping the SNP and produce a homozygous genotype with the allele supported by that read (often called 'pseudohaploid' or 'pseudodiploid'). 'randomDiploid` will randomly sample two reads overlapping the SNP and produce a genotype comprised of the two alleles supported by the two reads. 'majorityCall' will produce a genotype that is homozygous for the allele that appears in the majority of reads overlapping the SNP.\n\n> Modifies pileupCaller parameters: `--randomHaploid` `--randomDiploid` `--majorityCall`", "enum": ["randomHaploid", "randomDiploid", "majorityCall"] }, "genotyping_pileupcaller_transitions_mode": { "type": "string", "default": "AllSites", - "description": "Specify the calling mode for transitions.", - "help_text": "Specify if genotypes of transition SNPs should be called, set to missing, or excluded from the genotypes respectively. \n\n> Modifies pileupCaller parameter: `--skipTransitions` `--transitionsMissing`", + "description": "Specify the calling mode for transitions with pileupCaller.", + "help_text": "Specify if genotypes of transition SNPs should be called, set to missing, or excluded from the genotypes respectively.\n\n> Modifies pileupCaller parameter: `--skipTransitions` `--transitionsMissing`", "enum": ["AllSites", "TransitionsMissing", "SkipTransitions"], "fa_icon": "fas fa-toggle-on" }, @@ -975,13 +982,13 @@ "default": 30, "fa_icon": "fas fa-balance-scale-right", "description": "Specify GATK phred-scaled confidence threshold.", - "help_text": "If selected, specify a GATK genotyper phred-scaled confidence threshold of a given SNP/INDEL call.\n\n> Modifies GATK UnifiedGenotyper or HaplotypeCaller parameter: `-stand_call_conf`" + "help_text": "Specify a GATK genotyper phred-scaled confidence threshold of a given SNP/INDEL call.\n\n> Modifies GATK UnifiedGenotyper or HaplotypeCaller parameter: `-stand_call_conf`" }, "genotyping_gatk_dbsnp": { "type": "string", - "help_text": "(Optional) Specify VCF file for output VCF SNP annotation e.g. if you want to annotate your VCF file with 'rs' SNP IDs. Check GATK documentation for more information. Gzip not accepted.", + "help_text": "Specify VCF file for output VCF SNP annotation, e.g. if you want to annotate your VCF file with 'rs' SNP IDs. Check GATK documentation for more information. Gzip not accepted.", "fa_icon": "fas fa-pen-alt", - "description": "Specify VCF file for SNP annotation of output VCF files. Optional. Gzip not accepted.", + "description": "Specify VCF file for SNP annotation of output VCF files for GATK.", "pattern": "^\\S+\\.vcf$", "format": "file-path", "mimetype": "VCF" @@ -990,16 +997,16 @@ "type": "integer", "default": 250, "fa_icon": "fas fa-icicles", - "description": "Maximum depth coverage allowed for genotyping before down-sampling is turned on.", - "help_text": "Maximum depth coverage allowed for genotyping before down-sampling is turned on. Any position with a coverage higher than this value will be randomly down-sampled to this many reads.\n\n> Modifies GATK UnifiedGenotyper parameter: `-dcov`" + "description": "Specify the maximum depth coverage allowed for genotyping with GATK before down-sampling is turned on.", + "help_text": "Specify the maximum depth coverage allowed for genotyping before down-sampling is turned on. Any position with a coverage higher than this value will be randomly down-sampled to this many reads.\n\n> Modifies GATK UnifiedGenotyper parameter: `-dcov`" }, "genotyping_gatk_ug_out_mode": { "type": "string", "default": "EMIT_VARIANTS_ONLY", - "description": "Specify GATK output mode.", + "description": "Specify GATK UnifiedGenotyper output mode.", "enum": ["EMIT_VARIANTS_ONLY", "EMIT_ALL_CONFIDENT_SITES", "EMIT_ALL_SITES"], - "help_text": "If GATK UnifiedGenotyper is selected as the genotyping tool, this defines the output mode to use when producing the output VCF (i.e. produce calls for every site or just confidence sites.)\n\n> Modifies GATK UnifiedGenotyper parameter: `--output_mode`", - "fa_icon": "fas fa-bullhorn" + "help_text": "Specify GATK UnifiedGenotyper output mode to use when producing the output VCF (i.e. produce calls for every site or just confidence sites.)\n\n> Modifies GATK UnifiedGenotyper parameter: `--output_mode`", + "fa_icon": "fas fa-toggle-on" }, "genotyping_gatk_ug_genotype_mode": { "type": "string", @@ -1007,49 +1014,49 @@ "description": "Specify UnifiedGenotyper likelihood model.", "enum": ["SNP", "INDEL", "BOTH", "GENERALPLOIDYSNP", "GENERALPLOIDYINDEL"], "fa_icon": "fas fa-project-diagram", - "help_text": "If GATK UnifiedGenotyper is selected as the genotyping tool, this sets which likelihood model to follow, i.e. whether to call only SNPs or INDELS etc.\n\n> Modifies GATK UnifiedGenotyper parameter: `--genotype_likelihoods_model`" + "help_text": "Specify GATK UnifiedGenotyper likelihood model, i.e. whether to call only SNPs or INDELS etc.\n\n> Modifies GATK UnifiedGenotyper parameter: `--genotype_likelihoods_model`" }, "genotyping_gatk_ug_keeprealignbam": { "type": "boolean", - "fa_icon": "far fa-save", + "fa_icon": "fas fa-save", "description": "Specify to keep the BAM output of re-alignment around variants from GATK UnifiedGenotyper.", - "help_text": "If GATK UnifiedGenotyper is selected as the genotyping tool, providing this parameter will output the BAMs that have realigned reads (with GATK's (v3) IndelRealigner) around possible variants for improved genotyping in addition to the standard VCF output.\n\nThese BAMs will be stored in the same folder as the corresponding VCF files." + "help_text": "Specify to output the BAMs that have realigned reads (with GATK (v3) IndelRealigner) around possible variants for improved genotyping with GATK UnifiedGenotyper in addition to the standard VCF output.\n\nThese BAMs will be stored in the same folder as the corresponding VCF files." }, "genotyping_gatk_ug_defaultbasequalities": { "type": "integer", "default": -1, - "description": "Supply a default base quality if a read is missing a base quality score. Setting to -1 turns this off.", - "help_text": "If GATK UnifiedGenotyper is selected as the genotyping tool, specify a value to set base quality scores, if reads are missing this information. Might be useful if you have 'synthetically' generated reads (e.g. chopping up a reference genome). Default is set to `-1` which is to not set any default quality (turned off). \n\n> Modifies GATK UnifiedGenotyper parameter: `--defaultBaseQualities`", + "description": "Specify to supply a default base quality if a read is missing a base quality score.", + "help_text": "Specify a value to set base quality scores for genotyping with GATK UnifiedGenotyper, if reads are missing this information. Might be useful if you have 'synthetically' generated reads (e.g. chopping up a reference genome). Default is set to `-1` which is to not set any default quality (turned off).\n\n> Modifies GATK UnifiedGenotyper parameter: `--defaultBaseQualities`", "fa_icon": "fas fa-redo-alt" }, "genotyping_gatk_hc_out_mode": { "type": "string", "default": "EMIT_VARIANTS_ONLY", - "fa_icon": "fas fa-bullhorn", - "description": "Specify GATK output mode.", - "help_text": "If GATK HaplotypeCaller is selected as the genotyping tool, this sets the type of sites that should be included in the output VCF (i.e. produce calls for every site or just confidence sites). \n\n> Modifies GATK HaplotypeCaller parameter: `--output_mode`", + "fa_icon": "fas fa-toggle-on", + "description": "Specify GATK HaplotypeCaller output mode.", + "help_text": "Specify the type of sites that should be included in the output VCF after genotyping with GATK HaplotypeCaller (i.e. produce calls for every site or just confidence sites).\n\n> Modifies GATK HaplotypeCaller parameter: `--output_mode`", "enum": ["EMIT_VARIANTS_ONLY", "EMIT_ALL_CONFIDENT_SITES", "EMIT_ALL_ACTIVE_SITES"] }, "genotyping_gatk_hc_emitrefconf": { "type": "string", "default": "GVCF", - "fa_icon": "fas fa-bullhorn", + "fa_icon": "fas fa-toggle-on", "description": "Specify HaplotypeCaller mode for emitting reference confidence calls.", - "help_text": "If GATK HaplotypeCaller is selected as the genotyping tool, this sets the mode for emitting reference confidence calls.\n\n> Modifies GATK HaplotypeCaller parameter: `--emit-ref-confidence`", + "help_text": "Specify GATK HaplotypeCaller mode for emitting reference confidence calls.\n\n> Modifies GATK HaplotypeCaller parameter: `--emit-ref-confidence`", "enum": ["NONE", "BP_RESOLUTION", "GVCF"] }, "genotyping_freebayes_min_alternate_count": { "type": "integer", "default": 1, - "description": "Specify minimum required supporting observations of an alternate allele to consider a variant.", - "help_text": "Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position.\n\n> Modifies freebayes parameter: `-C`", - "fa_icon": "fas fa-align-center" + "description": "Specify minimum required supporting observations of an alternate allele to consider a variant in FreeBayes.", + "help_text": "Specify the minimum count of observations supporting an alternate allele within a single individual in order to evaluate the position during genotyping with FreeBayes.\n\n> Modifies FreeBayes parameter: `-C`", + "fa_icon": "fas fa-filter" }, "genotyping_freebayes_skip_coverage": { "type": "integer", "default": 0, - "description": "Specify to skip over regions of high depth by discarding alignments overlapping positions where total read depth is greater than specified.", - "help_text": "Specify to skip over regions of high depth by discarding alignments overlapping positions where total read depth is greater than the specified value. Setting to 0 (the default) deactivates this behaviour.\n\n> Modifies freebayes parameter: `-g`", + "description": "Specify to skip over regions of high depth by discarding alignments overlapping positions where total read depth is greater than specified in FreeBayes.", + "help_text": "Specify to skip over regions of high depth by discarding alignments overlapping positions where total read depth is greater than the specified value during genotyping with FreeBayes. This is set to 0 by default, which deactivates this behaviour.\n\n> Modifies FreeBayes parameter: `-g`", "fa_icon": "fab fa-think-peaks" }, "genotyping_angsd_glmodel": { @@ -1057,7 +1064,7 @@ "default": "samtools", "fa_icon": "fas fa-project-diagram", "description": "Specify which ANGSD genotyping likelihood model to use.", - "help_text": "Specify which genotype likelihood model to use.\n\n> Modifies angsd parameter: `-GL`", + "help_text": "Specify which genotype likelihood model to use in ANGSD.\n\n> Modifies ANGSD parameter: `-GL`", "enum": ["samtools", "gatk", "soapsnp", "syk"] }, "genotyping_angsd_glformat": { @@ -1065,7 +1072,7 @@ "default": "binary", "fa_icon": "fas fa-text-height", "description": "Specify the formatting of the output VCF for ANGSD genotype likelihood results.", - "help_text": "Specifies what type of genotyping likelihood file format will be output.\n\nThe options refer to the following descriptions respectively:\n\n- `binary`: binary output of all 10 log genotype likelihood\n- `beagle_binary`: beagle likelihood file\n- `binary_three`: binary 3 times likelihood\n- `text`: text output of all 10 log genotype likelihoods.\n\nSee the [ANGSD documentation](http://www.popgen.dk/angsd/) for more information on which to select for your downstream applications.\n\n> Modifies angsd parameter: `-doGlf`", + "help_text": "Specifies what type of genotyping likelihood file format will be output by ANGSD.\n\nThe options refer to the following descriptions respectively:\n\n- `binary`: binary output of all 10 log genotype likelihood\n- `beagle_binary`: beagle likelihood file\n- `binary_three`: binary 3 times likelihood\n- `text`: text output of all 10 log genotype likelihoods.\n\nSee the [ANGSD documentation](http://www.popgen.dk/angsd/) for more information on which to select for your downstream applications.\n\n> Modifies ANGSD parameter: `-doGlf`", "enum": ["binary", "beagle_binary", "binary_three", "text"] } }, @@ -1082,15 +1089,15 @@ "properties": { "run_mtnucratio": { "type": "boolean", - "description": "Turn on mitochondrial to nuclear ratio calculation.", - "help_text": "Turn on the module to estimate the ratio of mitochondrial to nuclear reads.", - "fa_icon": "fas fa-balance-scale-left" + "description": "Specify to turn on mitochondrial to nuclear ratio calculation.", + "help_text": "Specify to turn on estimation of the ratio of mitochondrial to nuclear reads.", + "fa_icon": "fas fa-power-off" }, "mitochondrion_header": { "type": "string", "default": "MT", - "description": "Specify the name of the reference FASTA entry corresponding to the mitochondrial genome (up to the first space).", - "help_text": "Specify the FASTA entry in the reference file specified as --fasta, which acts as the mitochondrial 'chromosome' to base the ratio calculation on. The tool only accepts the first section of the header before the first space. The default chromosome name is based on hs37d5/GrCH37 human reference genome.", + "description": "Specify the name of the reference FASTA entry corresponding to the mitochondrial genome.", + "help_text": "Specify the FASTA entry in the reference file specified as `--fasta`, which acts as the mitochondrial 'chromosome' to base the ratio calculation on. The tool only accepts the first section of the header before the first space. The default chromosome name is based on hs37d5/GrCH37 human reference genome.", "fa_icon": "fas fa-heading" } } @@ -1098,19 +1105,19 @@ "mapping_statistics": { "title": "Mapping statistics", "type": "object", - "description": "", + "description": "Options for the calculation of mapping statistics", "default": "", "properties": { "mapstats_skip_preseq": { "type": "boolean", - "help_text": "Turns off the computation of library complexity estimation.", - "description": "Turns off the computation of library complexity estimation.", + "help_text": "Specify to turn off the computation of library complexity estimation.", + "description": "Specify to turn off the computation of library complexity estimation with preseq.", "fa_icon": "fas fa-forward" }, "mapstats_preseq_mode": { "type": "string", "default": "c_curve", - "help_text": "Specify which mode of preseq to run.\n\nFrom the [PreSeq documentation](http://smithlabresearch.org/wp-content/uploads/manual.pdf):\n\nc curve is used to compute the expected complexity curve of a mapped read file with a hypergeometric formula\n\nlc extrap is used to generate the expected yield for theoretical larger experiments and bounds on the number of distinct reads in the library and the associated confidence intervals, which is computed by bootstrapping the observed duplicate counts histogram.", + "help_text": "Specify which mode of preseq to run.\n\nFrom the [preseq documentation](http://smithlabresearch.org/wp-content/uploads/manual.pdf):\n\nc curve is used to compute the expected complexity curve of a mapped read file with a hypergeometric formula\n\nlc extrap is used to generate the expected yield for theoretical larger experiments and bounds on the number of distinct reads in the library and the associated confidence intervals, which is computed by bootstrapping the observed duplicate counts histogram.", "description": "Specify which mode of preseq to run.", "fa_icon": "fas fa-toggle-on", "enum": ["c_curve", "lc_extrap"] @@ -1118,57 +1125,52 @@ "mapstats_preseq_stepsize": { "type": "integer", "default": 1000, - "description": "Specify the step size (i.e., sampling regularity) of Preseq.", - "help_text": "Can be used to configure the step size of Preseq's c_curve and lc_extrap method. Can be useful when few reads and allow Preseq to be used for extrapolation of shallow sequencing results.\n\n\n> Modifies tool parameter(s)\n> - preseq: `-s`", + "description": "Specify the step size (i.e., sampling regularity) of preseq.", + "help_text": "Specify the step size of preseq's c_curve and lc_extrap methods. This can be useful when few reads are present and allow preseq to be used for extrapolation of shallow sequencing results.\n\n\n> Modifies preseq parameter:\n> `-s`", "fa_icon": "fas fa-shoe-prints" }, "mapstats_preseq_terms": { "type": "integer", "default": 100, "fa_icon": "fas fa-sort-numeric-up-alt", - "help_text": "Specify the maximum number of terms that lc_extrap mode will use.\n \n> Modifies preseq lc_extrap parameter: `-x`", - "description": "Specify the maximum number of terms that lc_extrap mode will use." + "help_text": "Specify the maximum number of terms that preseq's lc_extrap mode will use.\n\n> Modifies preseq lc_extrap parameter: `-x`", + "description": "Specify the maximum number of terms that preseq's lc_extrap mode will use." }, "mapstats_preseq_maxextrap": { "type": "integer", "default": 10000000000, "fa_icon": "fas fa-ban", - "help_text": "Specify the maximum extrapolation that lc_extrap mode will perform.\n\n> Modifies preseq lc_extrap parameter: `-e`", - "description": "Specify the maximum extrapolation (lc_extrap mode only)" + "help_text": "Specify the maximum extrapolation that preseq's lc_extrap mode will perform.\n\n> Modifies preseq lc_extrap parameter: `-e`", + "description": "Specify the maximum extrapolation to use for preseq's lc_extrap mode." }, "mapstats_preseq_bootstrap": { "type": "integer", "default": 100, "fa_icon": "fab fa-bootstrap", - "help_text": "Specify the number of bootstraps lc_extrap mode will perform to calculate confidence intervals.\n\n> Modifies preseq lc_extrap parameter: `-n`", - "description": "Specify number of bootstraps to perform (lc_extrap mode only)" + "help_text": "Specify the number of bootstraps preseq's lc_extrap mode will perform to calculate confidence intervals.\n\n> Modifies preseq lc_extrap parameter: `-n`", + "description": "Specify number of bootstraps to perform in preseq's lc_extrap mode." }, "mapstats_preseq_cval": { "type": "number", "default": 0.95, "fa_icon": "fas fa-check-circle", - "help_text": "Specify the allowed level of confidence intervals used for lc_extrap mode.\n\n> Modifies preseq lc_extrap parameter: `-c`", - "description": "Specify confidence interval level (lc_extrap mode only)" + "help_text": "Specify the allowed level of confidence intervals used for prerseq's lc_extrap mode.\n\n> Modifies preseq lc_extrap parameter: `-c`", + "description": "Specify confidence interval level for preseq's lc_extrap mode." }, "mapstats_preseq_defects_mode": { "type": "boolean", - "description": "Turns on defects mode to extrapolate without testing for defects (lc_extrap mode only).", - "help_text": "Activates defects mode of `lc_extrap`, which does the extrapolation without testing for defects.\n\n> Modifies preseq lc_extrap parameter: `-D`", - "fa_icon": "fab fa-creative-commons-sampling-plus" + "description": "Specify to turn on preseq defects mode to extrapolate without testing for defects in lc_extrap mode.", + "help_text": "Specify to activate defects mode of `preseq lc_extrap`, which runs the extrapolation without testing for defects.\n\n> Modifies preseq lc_extrap parameter: `-D`", + "fa_icon": "fas fa-power-off" }, "skip_qualimap": { - "type": "boolean" + "type": "boolean", + "description": "Specify to turn off coverage calculation with Qualimap.", + "fa_icon": "fas fa-forward" }, "snpcapture_bed": { "type": "string", - "description": "Path to snp capture in BED format. Provided file can also be gzipped." - }, - "elongation_factor": { - "type": "integer", - "default": 500, - "description": "Specify the number of bases to extend reference by (circularmapper only)", - "help_text": "The number of bases to extend the reference genome with. By default this is set to 500 if not specified otherwise.", - "fa_icon": "fas fa-external-link-alt" + "description": "Specify path to SNP capture positions in BED format for coverage calculations with Qualimap." } }, "fa_icon": "fas fa-search" @@ -1179,91 +1181,92 @@ "description": "Options for calculating and filtering for characteristic ancient DNA damage patterns.", "default": "", "fa_icon": "fas fa-chart-line", - "help_text": "More documentation can be seen in the follow links for:\n\n[DamageProfiler](https://github.com/Integrative-Transcriptomics/DamageProfiler)\n\nIf using TSV input, DamageProfiler is performed per library, i.e. after lane merging. BAM Trimming is only performed on non-UDG and half-UDG treated data.", + "help_text": "More documentation can be found at the follow links for:\n\n[DamageProfiler](https://github.com/Integrative-Transcriptomics/DamageProfiler)\n\nIf using TSV input, DamageProfiler is performed per library, i.e. after lane merging. BAM Trimming is only performed on non-UDG and half-UDG treated data.", "properties": { "skip_damagecalculation": { "type": "boolean", "fa_icon": "fas fa-forward", - "help_text": "Turns off damage calculation to compute DNA damage profiles." + "help_text": "Specify to turn off computation of DNA damage profiles.", + "description": "Specify to turn off ancient DNA damage calculation." }, "damagecalculation_tool": { "type": "string", "default": "damageprofiler", "enum": ["damageprofiler", "mapdamage"], - "fa_icon": "fas fa-tools", + "fa_icon": "fas fa-hammer", "description": "Specify the tool to use for damage calculation.", "help_text": "Specify the tool to be used for damage calculation. DamageProfiler is generally faster than mapDamage2, but the latter has an option to limit the number of reads used. This can significantly speed up the processing of very large files, where the damage estimates are already accurate after processing only a fraction of the input." }, "damagecalculation_yaxis": { "type": "number", "default": 0.3, - "description": "Specify the maximum misincorporation frequency that should be displayed on damage plot. Set to 0 to 'autoscale'.", - "help_text": "Specifies what the maximum misincorporation frequency should be displayed as, in the damage plot.\n\n> Modifies DamageProfiler parameter: -yaxis_dp_max or mapDamage2 parameter: --ymax", + "description": "Specify the maximum misincorporation frequency that should be displayed on damage plot.", + "help_text": "Specify the maximum misincorporation frequency that should be displayed in the damage plot.\n\n> Modifies DamageProfiler parameter: `-yaxis_dp_max` or mapDamage2 parameter: `--ymax`", "fa_icon": "fas fa-ruler-combined" }, "damagecalculation_xaxis": { "type": "integer", "default": 25, "description": "Specify number of bases of each read to be considered for plotting damage estimation.", - "help_text": "Specifies the number of bases to be considered for plotting nucleotide misincorporations.\n\n> Modifies DamageProfiler parameter: -t or mapDamage2 parameter: -m\n", + "help_text": "Specify the number of bases to be considered for plotting nucleotide misincorporations.\n\n> Modifies DamageProfiler parameter: `-t` or mapDamage2 parameter: `-m`", "fa_icon": "far fa-chart-bar" }, "damagecalculation_damageprofiler_length": { "type": "integer", "default": 100, - "description": "Specifies the length filter for DamageProfiler.", - "help_text": "Number of bases which are considered for frequency computations, by default set to 100.`\n\n> Modifies DamageProfiler parameter: -l", - "fa_icon": "fas fa-sort-amount-down" + "description": "Specify the length filter for DamageProfiler.", + "help_text": "Specify the number of bases which are considered for frequency computations.\n\n> Modifies DamageProfiler parameter: `-l`", + "fa_icon": "fas fa-ruler-horizontal" }, "damagecalculation_mapdamage_downsample": { "type": "integer", "default": 0, "fa_icon": "fas fa-compress-alt", - "description": "Specify the maximum number of reads to consider for damage calculation. Defaults value is 0 (i.e. no downsampling is performed).", - "help_text": "The maximum number of reads used for damage calculation in mapDamage2. Can be used to significantly reduce the amount of time required for damage assessment. Note that a too low value can also obtain incorrect results.\n\n>Modifies mapDamage2 parameter: -n\n" + "description": "Specify the maximum number of reads to consider for damage calculation with mapDamage.", + "help_text": "Specify the maximum number of reads used for damage calculation in mapDamage2. This can be used to significantly reduce the amount of time required for damage assessment. Note that a too low value can also obtain incorrect results.\n\n> Modifies mapDamage2 parameter: `-n`" } } }, "feature_annotation_statistics": { "title": "Feature Annotation Statistics", "type": "object", - "description": "Options for getting reference annotation statistics (e.g. gene coverages)", + "description": "Options for calculating reference annotation statistics (e.g. gene coverages)", "default": "", "properties": { "run_bedtools_coverage": { "type": "boolean", - "description": "Turn on ability to calculate no. reads, depth and breadth coverage of features in reference.", - "fa_icon": "fas fa-chart-area", - "help_text": "Specifies to turn on the bedtools module, producing statistics for breadth (or percent coverage), and depth (or X fold) coverages.\n\n> Modifies tool parameter(s):\n- bedtools coverage: `-mean`" + "description": "Specify to turn on calculation of number of reads, depth and breadth coverage of features in reference with bedtools.", + "fa_icon": "fas fa-power-off", + "help_text": "Specify to turn on the bedtools module, producing statistics for breadth (or percent coverage), and depth (or X fold) coverages.\n\n> Modifies bedtools coverage parameter: `-mean`" }, "mapstats_bedtools_featurefile": { "type": "string", - "description": "Path to GFF or BED file containing positions of features in reference file (--fasta). Path should be enclosed in quotes.", + "description": "Specify path to GFF or BED file containing positions of features in reference file for bedtools.", "fa_icon": "fas fa-file-signature", "help_text": "Specify the path to a GFF/BED containing the feature coordinates (or any acceptable input for [`bedtools coverage`](https://bedtools.readthedocs.io/en/latest/content/tools/coverage.html)). Must be in quotes.\n" } }, "fa_icon": "fas fa-scroll", - "help_text": "If you're interested in looking at coverage stats for certain features on your\nreference such as genes, SNPs etc., you can use the following bedtools module\nfor this purpose.\n\nMore documentation on bedtools can be seen in the [bedtools\ndocumentation](https://bedtools.readthedocs.io/en/latest/)\n\nIf using TSV input, bedtools is run after library merging of same-named library\nBAMs that have the same type of UDG treatment.\n" + "help_text": "If you're interested in looking at coverage stats for certain features on your reference such as genes, SNPs etc., you can use the following bedtools module for this purpose.\n\nMore documentation on bedtools can be seen in the [bedtools\ndocumentation](https://bedtools.readthedocs.io/en/latest/)\n\nIf using TSV input, bedtools is run after library merging of same-named library BAMs that have the same type of UDG treatment." }, "host_removal": { "title": "Host Removal", "type": "object", - "description": "", + "description": "Options for removing host-mapped reads", "default": "", "properties": { "run_host_removal": { "type": "boolean", - "description": "Turn on per-lane creation of pre-adapter-removal and/or read-pair-merging FASTQ files without reads that mapped to reference (e.g. for public upload of privacy sensitive non-host data)", - "help_text": "Recreates pre-adapter-removal and/or read-pair-merging FASTQ files but without reads that mapped to reference (e.g. for public upload of privacy-sensitive non-host data)", + "description": "Specify to turn on creation of pre-adapter-removal and/or read-pair-merging FASTQ files without reads that mapped to reference (e.g. for public upload of privacy sensitive non-host data).", + "help_text": "Specify to recreate pre-adapter-removal and/or read-pair-merging FASTQ files but without reads that mapped to reference (e.g. for public upload of privacy-sensitive non-host data)", "fa_icon": "fas fa-power-off" }, "host_removal_mode": { "type": "string", "default": "remove", - "description": "Host-mapped read removal mode. Remove mapped reads completely from FASTQ (remove) or just mask the host sequence of mapped reads with N (replace).", - "help_text": "Modifies extract_map_reads.py parameter: -m", - "fa_icon": "fas fa-plane-slash", + "description": "Specify the host-mapped read removal mode.", + "help_text": "Specify the host-mapped read removal mode.\n\n> Modifies extract_map_reads.py parameter: -m", + "fa_icon": "fas fa-toggle-on", "enum": ["remove", "replace"] } }, @@ -1272,56 +1275,56 @@ "contamination_estimation": { "title": "Contamination estimation", "type": "object", - "description": "Options for the estimation of contamination", + "description": "Options for the estimation of contamination in human data", "default": "", "fa_icon": "fas fa-radiation-alt", "properties": { "run_contamination_estimation_angsd": { "type": "boolean", - "description": "Turn on nuclear contamination estimation for genomes with ANGSD.", - "help_text": "Specify to run the optional processes for nuclear DNA contamination estimation with ANGSD.", + "description": "Specify to turn on nuclear contamination estimation for genomes with ANGSD.", + "help_text": "Specify to run nuclear DNA contamination estimation with ANGSD.", "fa_icon": "fas fa-power-off" }, "contamination_estimation_angsd_chrom_name": { "type": "string", "default": "X", - "description": "The name of the chromosome to be used for contamination estimation.", - "help_text": "The name of the chromosome as specified in your FASTA/bam header.\ne.g. 'X' for hs37d5, 'chrX' for HG19", + "description": "Specify the name of the chromosome to be used for contamination estimation with ANGSD.", + "help_text": "Specify the name of the chromosome to be used for contamination estimation with ANGSD as specified in your FASTA/BAM header, e.g. 'X' for hs37d5 or 'chrX' for hg19", "fa_icon": "fas fa-address-card" }, "contamination_estimation_angsd_range_from": { "type": "integer", "default": 5000000, - "description": "The first position on the chromosome to be used for contamination estimation with ANGSD.", - "help_text": "The beginning of the genetic range that should be utilised for nuclear contamination estimation.", + "description": "Specify the first position on the chromosome to be used for contamination estimation with ANGSD.", + "help_text": "Specify the beginning of the genetic range that should be utilised for nuclear contamination estimation with ANGSD.", "fa_icon": "fas fa-map-marker-alt" }, "contamination_estimation_angsd_range_to": { "type": "integer", "default": 154900000, - "help_text": "The end of the genetic range that should be utilised for nuclear contamination estimation.", - "description": "The last position on the chromosome to be used for contamination estimation with ANGSD.", + "help_text": "Specify the end of the genetic range that should be utilised for nuclear contamination estimation with ANGSD.", + "description": "Specify the last position on the chromosome to be used for contamination estimation with ANGSD.", "fa_icon": "fas fa-map-marker-alt" }, "contamination_estimation_angsd_mapq": { "type": "integer", "default": 30, - "help_text": "> Modifies angsd parameter: `-minMapQ`", + "help_text": "Specify the minimum mapping quality reads should have for contamination estimation with ANGSD.\n\n> Modifies ANGSD parameter: `-minMapQ`", "description": "Specify the minimum mapping quality reads should have for contamination estimation with ANGSD.", - "fa_icon": "fas fa-thermometer-full" + "fa_icon": "fas fa-filter" }, "contamination_estimation_angsd_minq": { "type": "integer", "default": 30, "description": "Specify the minimum base quality reads should have for contamination estimation with ANGSD.", - "help_text": "> Modifies angsd parameter: `-minQ`", - "fa_icon": "fas fa-ruler-vertical" + "help_text": "Specify the minimum base quality reads should have for contamination estimation with ANGSD.\n\n> Modifies ANGSD parameter: `-minQ`", + "fa_icon": "fas fa-filter" }, "contamination_estimation_angsd_hapmap": { "type": "string", - "default": "/Users/judith_ballesteros/Documents/GitHub/eager/assets/angsd_resources/HapMapChrX.gz", - "description": "Path to HapMap file of chromosome for contamination estimation..", - "help_text": "The haplotype map, or \"HapMap\", records the location of haplotype blocks and their tag SNPs.", + "default": "${projectDir}/assets/angsd_resources/HapMapChrX.gz", + "description": "Specify path to HapMap file of chromosome for contamination estimation with ANGSD.", + "help_text": "Specify a path to HapMap file of chromosome for contamination estimation with ANGSD. The haplotype map, or \"HapMap\", records the location of haplotype blocks and their tag SNPs.", "fa_icon": "fas fa-map" } } @@ -1329,20 +1332,20 @@ "human_sex_determination": { "title": "Human Sex Determination", "type": "object", - "description": "Options for the calculation of biological sex of human individuals.", + "description": "Options for the calculation of genetic sex of human individuals.", "default": "", "properties": { "run_sexdeterrmine": { "type": "boolean", - "fa_icon": "fas fa-transgender-alt", - "description": "Turn on sex determination for human reference genomes. This will run on single- and double-stranded variants of a library separately.", - "help_text": "Specify to run the optional process of sex determination." + "fa_icon": "fas fa-power-off", + "description": "Specify to turn on sex determination for genomes mapped to human reference genomes with Sex.DetERRmine.", + "help_text": "Specify to run genetic sex determination." }, "sexdeterrmine_bedfile": { "type": "string", "fa_icon": "fas fa-bed", - "description": "Specify path to SNP panel in bed format for error bar calculation. Optional (see documentation).", - "help_text": "Specify an optional bedfile of the list of SNPs to be used for X-/Y-rate calculation. Running without this parameter will considerably increase runtime, and render the resulting error bars untrustworthy. Theoretically, any set of SNPs that are distant enough that two SNPs are unlikely to be covered by the same read can be used here. The programme was coded with the 1240K panel in mind." + "description": "Specify path to SNP panel in BED format for error bar calculation.", + "help_text": "Specify a BED file with SNPs to be used for X-/Y-rate calculation. Running without this parameter will considerably increase runtime, and render the resulting error bars untrustworthy. Theoretically, any set of SNPs that are distant enough that two SNPs are unlikely to be covered by the same read can be used here. The programme was coded with the 1240k panel in mind." } }, "fa_icon": "fas fa-transgender-alt", From 050acfd4d555c1b6f281af90cecdf3e27acb8e7d Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 5 Jul 2024 10:45:31 +0200 Subject: [PATCH 07/61] fix schema --- nextflow_schema.json | 1 + 1 file changed, 1 insertion(+) diff --git a/nextflow_schema.json b/nextflow_schema.json index 06737bf47..4e443e435 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -611,6 +611,7 @@ "description": "Specify the number of bases to extend reference by (circularmapper only)", "help_text": "The number of bases to extend the reference genome with. By default this is set to 500 if not specified otherwise.", "fa_icon": "fas fa-external-link-alt" + } }, "fa_icon": "fas fa-layer-group" }, From d7a2d9232a2d8a3c6d8edb29dd7874a1bff73a7d Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 5 Jul 2024 15:12:34 +0200 Subject: [PATCH 08/61] WIP --- subworkflows/local/circularmapper.nf | 118 +++++++++--------- subworkflows/local/map.nf | 14 +-- .../local/reference_indexing_single.nf | 2 +- 3 files changed, 68 insertions(+), 66 deletions(-) diff --git a/subworkflows/local/circularmapper.nf b/subworkflows/local/circularmapper.nf index 73578bda1..b12dc5d55 100644 --- a/subworkflows/local/circularmapper.nf +++ b/subworkflows/local/circularmapper.nf @@ -4,76 +4,78 @@ include { CIRCULARMAPPER_CIRCULARGENERATOR } from '../../modules/nf-core/circularmapper/circulargenerator/main' include { CIRCULARMAPPER_REALIGNSAMFILE } from '../../modules/nf-core/circularmapper/realignsamfile/main' -include { BWA_ALN as BWA_ALN_CIRCULARMAPPER } from '../../modules/nf-core/bwa/aln/main' +include { FASTQ_ALIGN_BWAALN } from '../../subworkflows/nf-core/fastq_align_bwaaln/main' include { BWA_INDEX as BWA_INDEX_CIRCULARMAPPER } from '../../modules/nf-core/bwa/index/main' -include { BWA_SAMSE as BWA_SAMSE_CIRCULARMAPPER } from '../../modules/nf-core/bwa/samse/main' workflow CIRCULARMAPPER { + // TODO - PRepare input for FASTQ_ALIGN_BWAALN SWF, then use CIRCULARMAPPER_REALIGNSAMFILE file anf index output SAM file to emit. take: - fasta_reference // channel (mandatory): [ val(meta), path(reference) ] - eval // channel (mandatory): val(elongation value) - fastq_reads // channel (mandatory): [ val(meta), path(reads) ] + ch_reference // channel (mandatory): [ val(meta), path(reference) ] + elongation_value // channel (mandatory): val(elongation value) + fastq_reads // channel (mandatory): [ val(meta), path(reads) ] main: - ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() + CIRCULARMAPPER_CIRCULARGENERATOR(ch_reference, elongation_value) + ch_versions = ch_versions.mix( CIRCULARMAPPER_CIRCULARGENERATOR.out.versions.first() ) - ch_reference = fasta_reference - ch_eval = eval - - CIRCULARMAPPER_CIRCULARGENERATOR(ch_reference, ch_eval) - ch_versions = ch_versions.mix( CIRCULARMAPPER_CIRCULARGENERATOR.out.versions.first() ) - - BWA_INDEX_CIRCULARMAPPER(CIRCULARMAPPER_CIRCULARGENERATOR.out.fasta) - ch_versions = ch_versions.mix( BWA_INDEX_CIRCULARMAPPER.out.versions.first() ) - - ch_input_bwa_aln = fastq_reads - .map { - // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute - addNewMetaFromAttributes( it, "reference" , "reference" , false ) - } - .groupTuple(by:0) - .combine( BWA_INDEX_CIRCULARMAPPER.out.index, by: 0 ) // [ [meta], fastq, bai ] - .multiMap { - combo_meta, metas, fastq, ref_bai, bai -> - def ids = metas.collect { meta -> meta.id } - fastqs: [ combo_meta + [id: ids], fastq ] - bai: [ ref_bai, bai ] - } - - BWA_ALN_CIRCULARMAPPER(ch_input_bwa_aln) - ch_versions = ch_versions.mix( BWA_ALN_CIRCULARMAPPER.out.versions.first() ) - - ch_input_bwa_samse = ch_input_bwa_aln - .combine( BWA_ALN_CIRCULARMAPPER.out.sai, by: 0 ) // [ [meta], fastq, bai, sai ] - .multiMap { - metas, fastq, ref_bai, bai, ref_sai, sai -> - fastqs: [ metas, fastq, sai ] - bai: [ ref_bai, bai ] - } - - BWA_SAMSE_CIRCULARMAPPER(ch_input_bwa_samse) - ch_versions = ch_versions.mix( BWA_SAMSE_CIRCULARMAPPER.out.versions.first() ) - - ch_input_realignsamfile = BWA_SAMSE_CIRCULARMAPPER.out.bam - .combine(CIRCULARMAPPER_CIRCULARGENERATOR.out.fasta, by: 0) - .combine(ch_eval) - .multiMap { - ref_bam, bam, ref_fasta, fasta, ch_eval -> - bam: [ ref_bam, bam ] - fasta: [ ref_fasta, fasta ] - eval: [ ch_eval ] - } - - CIRCULARMAPPER_REALIGNSAMFILE(ch_input_realignsamfile) - ch_versions = ch_versions.mix( CIRCULARMAPPER_REALIGNSAMFILE.out.versions.first() ) + BWA_INDEX_CIRCULARMAPPER(CIRCULARMAPPER_CIRCULARGENERATOR.out.fasta) + ch_versions = ch_versions.mix( BWA_INDEX_CIRCULARMAPPER.out.versions.first() ) - emit: + ch_reference_for_bwa = BWA_INDEX_CIRCULARMAPPER.out.index + .map { + // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute + addNewMetaFromAttributes( it, "id" , "reference" , false ) + } - bam = CIRCULARMAPPER_REALIGNSAMFILE.out.bam // channel: [ val(meta), path(bam) ] - versions = ch_versions // channel: [ path(versions.yml) ] + ch_input_bwa_aln = fastq_reads + .map { + // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute + addNewMetaFromAttributes( it, "reference" , "reference" , false ) + } + .groupTuple(by:0) + .combine( ch_reference_for_bwa, by: 0 ) + .dump(tag:"ch_input_bwa_aln") + // .multiMap { + // combo_meta, meta, fastq, ref_meta, ref_index -> + // def ids = metas.collect { meta -> meta.id } + // reads: [ combo_meta + [id: ids], fastq ] + // index: [ ref_bai, bai ] + // } + + // BWA_ALN_CIRCULARMAPPER(ch_input_bwa_aln) + // ch_versions = ch_versions.mix( BWA_ALN_CIRCULARMAPPER.out.versions.first() ) + + // ch_input_bwa_samse = ch_input_bwa_aln + // .combine( BWA_ALN_CIRCULARMAPPER.out.sai, by: 0 ) // [ [meta], fastq, bai, sai ] + // .multiMap { + // metas, fastq, ref_bai, bai, ref_sai, sai -> + // fastqs: [ metas, fastq, sai ] + // bai: [ ref_bai, bai ] + // } + + // BWA_SAMSE_CIRCULARMAPPER(ch_input_bwa_samse) + // ch_versions = ch_versions.mix( BWA_SAMSE_CIRCULARMAPPER.out.versions.first() ) + + // ch_input_realignsamfile = BWA_SAMSE_CIRCULARMAPPER.out.bam + // .combine(CIRCULARMAPPER_CIRCULARGENERATOR.out.fasta, by: 0) + // .combine(ch_eval) + // .multiMap { + // ref_bam, bam, ref_fasta, fasta, ch_eval -> + // bam: [ ref_bam, bam ] + // fasta: [ ref_fasta, fasta ] + // eval: [ ch_eval ] + // } + + // CIRCULARMAPPER_REALIGNSAMFILE(ch_input_realignsamfile) + // ch_versions = ch_versions.mix( CIRCULARMAPPER_REALIGNSAMFILE.out.versions.first() ) + + emit: + + // bam = CIRCULARMAPPER_REALIGNSAMFILE.out.bam // channel: [ val(meta), path(bam) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/local/map.nf b/subworkflows/local/map.nf index f738161c5..7c5addd81 100644 --- a/subworkflows/local/map.nf +++ b/subworkflows/local/map.nf @@ -120,17 +120,17 @@ workflow MAP { ch_input_for_circularmapper = reads .combine(index.map{ meta, index, fasta -> [ meta, fasta ] }) - .combine(ch_eval) + .dump(tag:"CM Inputs", pretty:true) .multiMap { meta, reads, meta2, fasta, eval -> reads: [ meta, reads ] - index: [ meta2, fasta ] - elon: [ eval ] + reference: [ meta2, fasta ] } - CIRCULARMAPPER(ch_input_for_circularmapper) - ch_versions = ch_versions.mix ( CIRCULARMAPPER.out.versions.first() ) - ch_mapped_bam = CIRCULARMAPPER.out.bam - ch_mapped_bai = Channel.empty() // Circularmapper doesn't give a bai + CIRCULARMAPPER( ch_input_for_circularmapper.reads, params.elongation_factor, ch_input_for_circularmapper.reference ) + ch_versions = ch_versions.mix ( CIRCULARMAPPER.out.versions ) + // TODO - Update SWF outputs + ch_mapped_lane_bam = CIRCULARMAPPER.out.bam + ch_mapped_lane_bai = Channel.empty() // Circularmapper doesn't give a bai } diff --git a/subworkflows/local/reference_indexing_single.nf b/subworkflows/local/reference_indexing_single.nf index a21b4727d..778352680 100644 --- a/subworkflows/local/reference_indexing_single.nf +++ b/subworkflows/local/reference_indexing_single.nf @@ -52,7 +52,7 @@ workflow REFERENCE_INDEXING_SINGLE { } // Generate mapper indicies if not supplied, and if supplied generate meta - if ( params.mapping_tool == 'bwaaln' || params.mapping_tool == 'bwamem' ){ + if ( params.mapping_tool == 'bwaaln' || params.mapping_tool == 'bwamem' || params.mapping_tool == 'circularmapper' ){ if ( !fasta_mapperindexdir ) { ch_fasta_mapperindexdir = BWA_INDEX ( ch_ungz_ref ).index From 6caacef3886670c3d262f283345eb31c26286a2f Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 5 Jul 2024 15:22:14 +0200 Subject: [PATCH 09/61] fix swf emissions --- subworkflows/local/circularmapper.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/circularmapper.nf b/subworkflows/local/circularmapper.nf index b12dc5d55..1498b6a3d 100644 --- a/subworkflows/local/circularmapper.nf +++ b/subworkflows/local/circularmapper.nf @@ -75,7 +75,7 @@ workflow CIRCULARMAPPER { emit: - // bam = CIRCULARMAPPER_REALIGNSAMFILE.out.bam // channel: [ val(meta), path(bam) ] + bam = channel.empty() //CIRCULARMAPPER_REALIGNSAMFILE.out.bam // channel: [ val(meta), path(bam) ] versions = ch_versions // channel: [ path(versions.yml) ] } From 670771d05db0465288b8da3723a692942f41b8dd Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 12 Jul 2024 11:37:42 +0200 Subject: [PATCH 10/61] add elongate_reference swf --- subworkflows/local/elongate_reference.nf | 74 ++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 subworkflows/local/elongate_reference.nf diff --git a/subworkflows/local/elongate_reference.nf b/subworkflows/local/elongate_reference.nf new file mode 100644 index 000000000..8c949ba4d --- /dev/null +++ b/subworkflows/local/elongate_reference.nf @@ -0,0 +1,74 @@ +// +// Elongate a reference genome by circularising the target sequence by a given elongation factor. +// + +include { CIRCULARMAPPER_CIRCULARGENERATOR } from '../../modules/nf-core/circularmapper/circulargenerator/main' +include { BWA_INDEX as BWA_INDEX_CIRCULARISED } from '../../modules/nf-core/bwa/index/main' + +workflow ELONGATE_REFERENCE { + take: + ch_reference // [ meta, fasta, fai ] + ch_elongated_reference // [ meta, elongated_fasta, elongated_fai ] + elongation_factor // [ int ] + // TODO CIRCULARMAPPER_CIRCULARGENERATOR module needs updating. `-s` option is the circular target and not the output file >.< + + main: + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + /* + Check what fasta files we have: + There are four options: + 1. Elongated reference with index (ignore circular target) + 2. Elongated reference without index (ignore circular target) + 3. No elongated reference, but circular target + 4. None of the above -> Throw error (should go in parameter validation) + */ + ch_circulargenerator_input = ch_elongated_reference + .branch{ + meta, elongated_fasta_index, elongated_fasta, circular_target -> + ready: elongated_fasta != "" && elongated_fasta_index != "" + needs_index: elongated_fasta != "" && elongated_fasta_index == "" + needs_elongation: elongated_fasta == "" && circular_target != "" + } + + // Elongate references that need it + // Join the original references to the branch of needs_elongation, to get the original fasta files, and elongate them. + ch_references_to_elongate = ch_circulargenerator_input.needs_elongation + .join( ch_reference ) + .map { + meta, elongated_fasta_index, elongated_fasta, circular_target, meta2, index, fasta -> + [ meta, fasta ] + } + + CIRCULARMAPPER_CIRCULARGENERATOR(ch_circulargenerator_input.needs_elongation, elongation_value) + ch_versions = ch_versions.mix( CIRCULARMAPPER_CIRCULARGENERATOR.out.versions.first() ) + + // Collect newly generated circular references and provided ones without an index, and index them. + ch_input_for_circular_indexing = ch_circulargenerator_input.needs_index + .map { + meta, elongated_fasta_index, elongated_fasta, circular_target -> + [ meta, elongated_fasta ] + } + .mix( CIRCULARMAPPER_CIRCULARGENERATOR.out.fasta ) + + BWA_INDEX_CIRCULARISED(ch_input_for_circular_indexing) + ch_versions = ch_versions.mix( BWA_INDEX_CIRCULARISED.out.versions.first() ) + + ch_indexed_references = ch_input_for_circular_indexing + .join( BWA_INDEX_CIRCULARISED.out.index ) + + // Then put all the indexed elongated references together and emit them + ch_circular_reference = ch_circulargenerator_input.ready + .map { + meta, elongated_fasta_index, elongated_fasta, circular_target -> + [ meta, elongated_fasta, elongated_fasta_index ] + } + .mix( ch_indexed_references ) + + emit: + circular_reference = ch_circular_reference // [ meta, fasta, fai ] + versions = ch_versions + mqc = ch_multiqc_files + +} From 21093ac94b89a6afa31116fc8df516ba684f0bd3 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 12 Jul 2024 11:57:13 +0200 Subject: [PATCH 11/61] wip n CM mapping --- subworkflows/local/map.nf | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/subworkflows/local/map.nf b/subworkflows/local/map.nf index 7c5addd81..1ffacf706 100644 --- a/subworkflows/local/map.nf +++ b/subworkflows/local/map.nf @@ -16,8 +16,9 @@ include { CIRCULARMAPPER } from '../../subworkflo workflow MAP { take: - reads // [ [meta], [read1, reads2] ] or [ [meta], [read1] ] - index // [ [meta], [ index ], [ fasta ] ] + reads // [ [meta], [read1, reads2] ] or [ [meta], [read1] ] + index // [ [meta], [ index ], [ fasta ] ] + elogated_index // [ [meta], [ index ], [ fasta ], [ circular_target ] ] main: ch_versions = Channel.empty() @@ -116,21 +117,22 @@ workflow MAP { ch_mapped_lane_bai = params.fasta_largeref ? SAMTOOLS_INDEX_BT2.out.csi : SAMTOOLS_INDEX_BT2.out.bai } else if ( params.mapping_tool == 'circularmapper' ) { - ch_eval = params.elongation_factor - - ch_input_for_circularmapper = reads - .combine(index.map{ meta, index, fasta -> [ meta, fasta ] }) - .dump(tag:"CM Inputs", pretty:true) - .multiMap { - meta, reads, meta2, fasta, eval -> - reads: [ meta, reads ] - reference: [ meta2, fasta ] - } - CIRCULARMAPPER( ch_input_for_circularmapper.reads, params.elongation_factor, ch_input_for_circularmapper.reference ) - ch_versions = ch_versions.mix ( CIRCULARMAPPER.out.versions ) - // TODO - Update SWF outputs - ch_mapped_lane_bam = CIRCULARMAPPER.out.bam - ch_mapped_lane_bai = Channel.empty() // Circularmapper doesn't give a bai + // Reference elongation and indexing takes place in the reference_indexing swf. + // Circularmapper takes non-elongated AND elongated references and reads as input (i think. wait for Alex's reply). + + // ch_input_for_circularmapper = reads + // .combine(index.map{ meta, index, fasta -> [ meta, fasta ] }) + // .dump(tag:"CM Inputs", pretty:true) + // .multiMap { + // meta, reads, meta2, fasta -> + // reads: [ meta, reads ] + // reference: [ meta2, fasta ] + // } + // CIRCULARMAPPER( ch_input_for_circularmapper.reads, params.elongation_factor, ch_input_for_circularmapper.reference ) + // ch_versions = ch_versions.mix ( CIRCULARMAPPER.out.versions ) + // // TODO - Update SWF outputs + // ch_mapped_lane_bam = CIRCULARMAPPER.out.bam + // ch_mapped_lane_bai = Channel.empty() // Circularmapper doesn't give a bai } From a6a145aa4502214318e14fa8c4d613223c3f8791 Mon Sep 17 00:00:00 2001 From: scarlhoff Date: Fri, 12 Jul 2024 11:59:12 +0200 Subject: [PATCH 12/61] add elongated fasta and fai input --- assets/schema_fasta.json | 14 ++++ nextflow.config | 34 ++++---- nextflow_schema.json | 16 +++- subworkflows/local/map.nf | 4 +- subworkflows/local/reference_indexing.nf | 47 ++++++++--- .../local/reference_indexing_multi.nf | 80 ++++++++++--------- .../local/reference_indexing_single.nf | 12 ++- 7 files changed, 133 insertions(+), 74 deletions(-) diff --git a/assets/schema_fasta.json b/assets/schema_fasta.json index d53bca776..d89310422 100644 --- a/assets/schema_fasta.json +++ b/assets/schema_fasta.json @@ -48,6 +48,20 @@ "pattern": "^\\S+$", "errorMessage": "The headers of the chromosome to be extended by circularmapper must not contain any spaces and no leading '>'." }, + "circularmapper_elongated_fasta": { + "type": "string", + "format": "file-path", + "pattern": "^\\S+\\.f(na|asta|a|as)(\\.gz)?$", + "exists": true, + "errorMessage": "The elongated Fasta files for the mapping reference must be provided with file extensions '.fasta', '.fa', '.fas', '.fna', '.fasta.gz','.fa.gz','.fas.gz', '.fna.gz' and cannot contain any spaces." + }, + "circularmapper_elongated_fai": { + "type": "string", + "format": "file-path", + "pattern": "^\\S+\\.fai$", + "exists": true, + "errorMessage": "Elongated fasta index files for the mapping reference cannot have any spaces and must have file extension '.fai'." + }, "mitochondrion_header": { "type": "string", "pattern": "^\\S+$", diff --git a/nextflow.config b/nextflow.config index 63663b004..467aeba3f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -108,20 +108,23 @@ params { preprocessing_adapterremoval_qualitymax = 41 // Mapping - mapping_tool = 'bwaaln' - mapping_bwaaln_n = 0.01 // From Oliva et al. 2021 (10.1093/bib/bbab076) - mapping_bwaaln_k = 2 - mapping_bwaaln_l = 1024 // From Oliva et al. 2021 (10.1093/bib/bbab076) - mapping_bwaaln_o = 2 // From Oliva et al. 2021 (10.1093/bib/bbab076) - mapping_bwamem_k = 19 - mapping_bwamem_r = 1.5 - mapping_bowtie2_alignmode = 'local' - mapping_bowtie2_sensitivity = 'sensitive' - mapping_bowtie2_n = 0 - mapping_bowtie2_l = 20 - mapping_bowtie2_trim5 = 0 - mapping_bowtie2_trim3 = 0 - mapping_bowtie2_maxins = 500 + mapping_tool = 'bwaaln' + mapping_bwaaln_n = 0.01 // From Oliva et al. 2021 (10.1093/bib/bbab076) + mapping_bwaaln_k = 2 + mapping_bwaaln_l = 1024 // From Oliva et al. 2021 (10.1093/bib/bbab076) + mapping_bwaaln_o = 2 // From Oliva et al. 2021 (10.1093/bib/bbab076) + mapping_bwamem_k = 19 + mapping_bwamem_r = 1.5 + mapping_bowtie2_alignmode = 'local' + mapping_bowtie2_sensitivity = 'sensitive' + mapping_bowtie2_n = 0 + mapping_bowtie2_l = 20 + mapping_bowtie2_trim5 = 0 + mapping_bowtie2_trim3 = 0 + mapping_bowtie2_maxins = 500 + mapping_circularmapper_elongation_factor = 500 + mapping_circularmapper_elongated_fasta = null + mapping_circularmapper_elongated_fai = null // BAM Filtering run_bamfiltering = false @@ -179,9 +182,6 @@ params { mapstats_preseq_cval = 0.95 mapstats_preseq_defects_mode = false - //Circular Mapper - elongation_factor = 500 - // Damage Calculation options skip_damagecalculation = false damagecalculation_tool = 'damageprofiler' diff --git a/nextflow_schema.json b/nextflow_schema.json index 4e443e435..f87749bd8 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -605,12 +605,24 @@ "help_text": "The maximum fragment for valid paired-end alignments. Only for paired-end mapping (i.e. unmerged), and therefore typically only useful for modern data.\n\n> Modifies Bowtie2 parameter: `--maxins`", "fa_icon": "fas fa-exchange-alt" }, - "elongation_factor": { + "mapping_circularmapper_elongation_factor": { "type": "integer", "default": 500, - "description": "Specify the number of bases to extend reference by (circularmapper only)", + "description": "Specify the number of bases to extend reference by (circularmapper only).", "help_text": "The number of bases to extend the reference genome with. By default this is set to 500 if not specified otherwise.", "fa_icon": "fas fa-external-link-alt" + }, + "mapping_circularmapper_elongated_fasta": { + "type": "string", + "description": "Specify an elongated reference FASTA to be used for circularmapper.", + "help_text": "Specify an already elongated FASTA file for circularmapper to avoid reprocessing.", + "fa_icon": "fas fa-address-book" + }, + "mapping_circularmapper_elongated_fai": { + "type": "string", + "description": "Specify a samtools index for the elongated FASTA file.", + "help_text": "Specify the index for an already elongated FASTA file to avoid reprocessing.", + "fa_icon": "fas fa-address-book" } }, "fa_icon": "fas fa-layer-group" diff --git a/subworkflows/local/map.nf b/subworkflows/local/map.nf index 7c5addd81..88c9d58a2 100644 --- a/subworkflows/local/map.nf +++ b/subworkflows/local/map.nf @@ -116,7 +116,7 @@ workflow MAP { ch_mapped_lane_bai = params.fasta_largeref ? SAMTOOLS_INDEX_BT2.out.csi : SAMTOOLS_INDEX_BT2.out.bai } else if ( params.mapping_tool == 'circularmapper' ) { - ch_eval = params.elongation_factor + ch_eval = params.mapping_circularmapper_elongation_factor ch_input_for_circularmapper = reads .combine(index.map{ meta, index, fasta -> [ meta, fasta ] }) @@ -126,7 +126,7 @@ workflow MAP { reads: [ meta, reads ] reference: [ meta2, fasta ] } - CIRCULARMAPPER( ch_input_for_circularmapper.reads, params.elongation_factor, ch_input_for_circularmapper.reference ) + CIRCULARMAPPER( ch_input_for_circularmapper.reads, params.mapping_circularmapper_elongation_factor, ch_input_for_circularmapper.reference ) ch_versions = ch_versions.mix ( CIRCULARMAPPER.out.versions ) // TODO - Update SWF outputs ch_mapped_lane_bam = CIRCULARMAPPER.out.bam diff --git a/subworkflows/local/reference_indexing.nf b/subworkflows/local/reference_indexing.nf index cdf8d0e90..02a2b2621 100644 --- a/subworkflows/local/reference_indexing.nf +++ b/subworkflows/local/reference_indexing.nf @@ -7,6 +7,7 @@ include { REFERENCE_INDEXING_MULTI } from '../../subworkflows/local/reference_i include { GUNZIP as GUNZIP_PMDBED } from '../../modules/nf-core/gunzip/main.nf' include { GUNZIP as GUNZIP_PMDFASTA } from '../../modules/nf-core/gunzip/main.nf' include { GUNZIP as GUNZIP_SNPBED } from '../../modules/nf-core/gunzip/main.nf' +include { GUNZIP as GUNZIP_CM_FASTA } from '../../modules/nf-core/gunzip/main.nf' workflow REFERENCE_INDEXING { take: @@ -20,12 +21,13 @@ workflow REFERENCE_INDEXING { // Warn user if they've given a reference sheet that already includes fai/dict/mapper index etc. if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && ( fasta_fai || fasta_dict || fasta_mapperindexdir )) log.warn("A TSV or CSV has been supplied to `--fasta_sheet` as well as e.g. `--fasta_fai`. --fasta_sheet CSV/TSV takes priority and --fasta_* parameters will be ignored.") - if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && ( params.mitochondrion_header || params.contamination_estimation_angsd_hapmap || params.damage_manipulation_pmdtools_reference_mask || params.damage_manipulation_pmdtools_reference_mask || params.snpcapture_bed || params.genotyping_pileupcaller_bedfile || params.genotyping_pileupcaller_snpfile || params.sexdeterrmine_bedfile || params.mapstats_bedtools_featurefile || params.genotyping_reference_ploidy || params.genotyping_gatk_dbsnp )) log.warn("A TSV or CSV has been supplied to `--fasta_sheet` as well as individual reference-specific input files, e.g. `--contamination_estimation_angsd_hapmap`. Input files specified in the --fasta_sheet CSV/TSV take priority and other input parameters will be ignored.") + if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && ( params.mitochondrion_header || params.contamination_estimation_angsd_hapmap || params.damage_manipulation_pmdtools_reference_mask || params.damage_manipulation_pmdtools_reference_mask || params.snpcapture_bed || params.genotyping_pileupcaller_bedfile || params.genotyping_pileupcaller_snpfile || params.sexdeterrmine_bedfile || params.mapstats_bedtools_featurefile || params.genotyping_reference_ploidy || params.genotyping_gatk_dbsnp, params.fasta_circular_target, params.circularmapper_elongated_fasta, params.circularmapper_elongated_fai )) log.warn("A TSV or CSV has been supplied to `--fasta_sheet` as well as individual reference-specific input files, e.g. `--contamination_estimation_angsd_hapmap`. Input files specified in the --fasta_sheet CSV/TSV take priority and other input parameters will be ignored.") if ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) { // If input (multi-)reference sheet supplied REFERENCE_INDEXING_MULTI ( fasta ) ch_reference_for_mapping = REFERENCE_INDEXING_MULTI.out.reference + ch_circularmapper = REFERENCE_INDEXING_MULTI.out.circularmapper ch_mitochondrion_header = REFERENCE_INDEXING_MULTI.out.mitochondrion_header ch_hapmap = REFERENCE_INDEXING_MULTI.out.hapmap ch_pmd_masked_fasta = REFERENCE_INDEXING_MULTI.out.pmd_masked_fasta @@ -39,6 +41,7 @@ workflow REFERENCE_INDEXING { } else { // If input FASTA and/or indicies supplied REFERENCE_INDEXING_SINGLE ( fasta, fasta_fai, fasta_dict, fasta_mapperindexdir ) + ch_circularmapper = REFERENCE_INDEXING_SINGLE.out.circularmapper ch_mitochondrion_header = REFERENCE_INDEXING_SINGLE.out.mitochondrion_header ch_hapmap = REFERENCE_INDEXING_SINGLE.out.hapmap ch_pmd_masked_fasta = REFERENCE_INDEXING_SINGLE.out.pmd_masked_fasta @@ -125,17 +128,39 @@ workflow REFERENCE_INDEXING { ch_dbsnp = ch_dbsnp .filter { it[1] != "" } + ch_circularmapper_for_gunzip = ch_circularmapper + .filter{ it[1] != "" || it[2] != "" || it[3] != "" } + .branch{ + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai -> + forgunzip: circularmapper_elongated_fasta.extension == "gz" + skip: true + } + + ch_circularmapper_input = ch_circularmapper_for_gunzip.gunzip + .multiMap{ + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai -> + gunzip: [ meta, circularmapper_elongated_fasta ] + remainder: [ meta, circular_target, circularmapper_elongated_fai ] + } + + GUNZIP_CM_FASTA( ch_circularmapper_input ) + ch_version = ch_versions.mix( GUNZIP_CM_FASTA.out.versions.first() ) + + ch_gunzipped_elongated = GUNZIP_CM_FASTA.out.gunzip.join( ch_circularmapper_input.remainder, failOnMismatch: true ) + ch_circularmapper_gunzipped = ch_circularmapper_for_gunzip.skip.mix( ch_gunzipped_elongated ) + emit: - reference = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex, circular_target ] - mitochondrion_header = ch_mitochondrion_header // [ meta, mitochondrion_header ] - hapmap = ch_hapmap // [ meta, hapmap ] - pmd_masking = ch_pmd_masking // [ meta, pmd_masked_fasta, pmd_bed_for_masking ] - pmd_bed_for_masking = ch_pmd_bed_for_masking // [ meta, pmd_bed_for_masking ] - snp_capture_bed = ch_capture_bed // [ meta, capture_bed ] - pileupcaller_bed_snp = ch_pileupcaller_bed_snp // [ meta, pileupcaller_bed, pileupcaller_snp ] - sexdeterrmine_bed = ch_sexdeterrmine_bed // [ meta, sexdet_bed ] - bedtools_feature = ch_bedtools_feature // [ meta, bedtools_feature ] - dbsnp = ch_dbsnp // [ meta, dbsnp ] + reference = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex, circular_target ] + circularmapper = ch_circularmapper_gunzipped // [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] + mitochondrion_header = ch_mitochondrion_header // [ meta, mitochondrion_header ] + hapmap = ch_hapmap // [ meta, hapmap ] + pmd_masking = ch_pmd_masking // [ meta, pmd_masked_fasta, pmd_bed_for_masking ] + pmd_bed_for_masking = ch_pmd_bed_for_masking // [ meta, pmd_bed_for_masking ] + snp_capture_bed = ch_capture_bed // [ meta, capture_bed ] + pileupcaller_bed_snp = ch_pileupcaller_bed_snp // [ meta, pileupcaller_bed, pileupcaller_snp ] + sexdeterrmine_bed = ch_sexdeterrmine_bed // [ meta, sexdet_bed ] + bedtools_feature = ch_bedtools_feature // [ meta, bedtools_feature ] + dbsnp = ch_dbsnp // [ meta, dbsnp ] versions = ch_versions } diff --git a/subworkflows/local/reference_indexing_multi.nf b/subworkflows/local/reference_indexing_multi.nf index 6a42d9208..f71280bc6 100644 --- a/subworkflows/local/reference_indexing_multi.nf +++ b/subworkflows/local/reference_indexing_multi.nf @@ -20,23 +20,25 @@ workflow REFERENCE_INDEXING_MULTI { // Import reference sheet and change empty arrays to empty strings for compatibility with single reference input ch_splitreferencesheet_for_branch = Channel.fromSamplesheet("fasta_sheet") .map{ - meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion, capture_bed, pileupcaller_bed, pileupcaller_snp, hapmap, pmd_masked_fasta, pmd_bed_for_masking, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp -> - meta.ploidy = meta.genotyping_ploidy != null ? meta.genotyping_ploidy : params.genotyping_reference_ploidy - fai = fai != [] ? fai : "" - dict = dict != [] ? dict : "" - mapper_index = mapper_index != [] ? mapper_index : "" - circular_target = circular_target != [] ? circular_target : "" - mitochondrion = mitochondrion != [] ? mitochondrion : "" - capture_bed = capture_bed != [] ? capture_bed : "" - pileupcaller_bed = pileupcaller_bed != [] ? pileupcaller_bed : "" - pileupcaller_snp = pileupcaller_snp != [] ? pileupcaller_snp : "" - hapmap = hapmap != [] ? hapmap : "" - pmd_masked_fasta = pmd_masked_fasta != [] ? pmd_masked_fasta : "" - pmd_bed_for_masking = pmd_bed_for_masking != [] ? pmd_bed_for_masking : "" - sexdet_bed = sexdet_bed != [] ? sexdet_bed : "" - bedtools_feature = bedtools_feature != [] ? bedtools_feature : "" - genotyping_gatk_dbsnp = genotyping_gatk_dbsnp != [] ? genotyping_gatk_dbsnp : "" - [ meta - meta.subMap('genotyping_ploidy'), fasta, fai, dict, mapper_index, circular_target, mitochondrion, capture_bed, pileupcaller_bed, pileupcaller_snp, hapmap, pmd_masked_fasta, pmd_bed_for_masking, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp ] + meta, fasta, fai, dict, mapper_index, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai, mitochondrion, capture_bed, pileupcaller_bed, pileupcaller_snp, hapmap, pmd_masked_fasta, pmd_bed_for_masking, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp -> + meta.ploidy = meta.genotyping_ploidy != null ? meta.genotyping_ploidy : params.genotyping_reference_ploidy + fai = fai != [] ? fai : "" + dict = dict != [] ? dict : "" + mapper_index = mapper_index != [] ? mapper_index : "" + circular_target = circular_target != [] ? circular_target : "" + circularmapper_elongated_fasta = circularmapper_elongated_fasta != [] ? circularmapper_elongated_fasta : "" + circularmapper_elongated_fai = circularmapper_elongated_fai != [] ? circularmapper_elongated_fai : "" + mitochondrion = mitochondrion != [] ? mitochondrion : "" + capture_bed = capture_bed != [] ? capture_bed : "" + pileupcaller_bed = pileupcaller_bed != [] ? pileupcaller_bed : "" + pileupcaller_snp = pileupcaller_snp != [] ? pileupcaller_snp : "" + hapmap = hapmap != [] ? hapmap : "" + pmd_masked_fasta = pmd_masked_fasta != [] ? pmd_masked_fasta : "" + pmd_bed_for_masking = pmd_bed_for_masking != [] ? pmd_bed_for_masking : "" + sexdet_bed = sexdet_bed != [] ? sexdet_bed : "" + bedtools_feature = bedtools_feature != [] ? bedtools_feature : "" + genotyping_gatk_dbsnp = genotyping_gatk_dbsnp != [] ? genotyping_gatk_dbsnp : "" + [ meta - meta.subMap('genotyping_ploidy'), fasta, fai, dict, mapper_index, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai, mitochondrion, capture_bed, pileupcaller_bed, pileupcaller_snp, hapmap, pmd_masked_fasta, pmd_bed_for_masking, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp ] } // GENERAL DESCRIPTION FOR NEXT SECTIONS @@ -52,8 +54,9 @@ workflow REFERENCE_INDEXING_MULTI { ch_input_from_referencesheet = ch_splitreferencesheet_for_branch .multiMap { - meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion, capture_bed, pileupcaller_bed, pileupcaller_snp, hapmap, pmd_masked_fasta, pmd_bed_for_masking, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp -> - generated: [ meta, fasta, fai, dict, mapper_index, circular_target ] + meta, fasta, fai, dict, mapper_index, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai, mitochondrion, capture_bed, pileupcaller_bed, pileupcaller_snp, hapmap, pmd_masked_fasta, pmd_bed_for_masking, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp -> + generated: [ meta, fasta, fai, dict, mapper_index ] + circularmapper: [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] mitochondrion_header: [ meta, mitochondrion ] angsd_hapmap: [ meta, hapmap ] pmd_masked_fasta: [ meta, pmd_masked_fasta ] @@ -68,7 +71,7 @@ workflow REFERENCE_INDEXING_MULTI { // Detect if fasta is gzipped or not ch_fasta_for_gunzip = ch_input_from_referencesheet.generated .branch { - meta, fasta, fai, dict, mapper_index, circular_target -> + meta, fasta, fai, dict, mapper_index -> forgunzip: fasta.extension == "gz" skip: true } @@ -76,9 +79,9 @@ workflow REFERENCE_INDEXING_MULTI { // Pull out name/file to match cardinality for GUNZIP module ch_gunzip_input = ch_fasta_for_gunzip.forgunzip .multiMap { - meta, fasta, fai, dict, mapper_index, circular_target -> + meta, fasta, fai, dict, mapper_index -> gunzip: [ meta, fasta ] - remainder: [ meta, fai, dict, mapper_index, circular_target ] + remainder: [ meta, fai, dict, mapper_index ] } @@ -96,7 +99,7 @@ workflow REFERENCE_INDEXING_MULTI { // Separate out non-faidxed references ch_fasta_for_faidx = ch_fasta_for_faiindexing .branch { - meta, fasta, fai, dict, mapper_index, circular_target -> + meta, fasta, fai, dict, mapper_index -> forfaidx: fai == "" skip: true } @@ -105,9 +108,9 @@ workflow REFERENCE_INDEXING_MULTI { ch_faidx_input = ch_fasta_for_faidx .forfaidx .multiMap { - meta, fasta, fai, dict, mapper_index, circular_target -> + meta, fasta, fai, dict, mapper_index -> faidx: [ meta, fasta ] - remainder: [ meta, fasta, dict, mapper_index, circular_target ] // we drop fai here as we are going to make it + remainder: [ meta, fasta, dict, mapper_index ] // we drop fai here as we are going to make it } SAMTOOLS_FAIDX ( ch_faidx_input.faidx, [ [], [] ] ) @@ -117,9 +120,9 @@ workflow REFERENCE_INDEXING_MULTI { ch_faidxed_formix = SAMTOOLS_FAIDX.out.fai .join( ch_faidx_input.remainder, failOnMismatch: true ) .map { - meta, fai, fasta, dict, mapper_index, circular_target -> + meta, fai, fasta, dict, mapper_index -> - [ meta, fasta, fai, dict, mapper_index, circular_target ] + [ meta, fasta, fai, dict, mapper_index ] } // Mix back newly faidx'd references with the pre-indexed ones @@ -131,7 +134,7 @@ workflow REFERENCE_INDEXING_MULTI { ch_fasta_for_dict = ch_fasta_for_dictindexing .branch { - meta, fasta, fai, dict, mapper_index, circular_target -> + meta, fasta, fai, dict, mapper_index -> fordict: dict == "" skip: true } @@ -139,9 +142,9 @@ workflow REFERENCE_INDEXING_MULTI { ch_dict_input = ch_fasta_for_dict .fordict .multiMap { - meta, fasta, fai, dict, mapper_index, circular_target -> + meta, fasta, fai, dict, mapper_index -> dict: [ meta, fasta ] - remainder: [ meta, fasta, fai, mapper_index, circular_target ] + remainder: [ meta, fasta, fai, mapper_index ] } PICARD_CREATESEQUENCEDICTIONARY ( ch_dict_input.dict ) @@ -150,9 +153,9 @@ workflow REFERENCE_INDEXING_MULTI { ch_dicted_formix = PICARD_CREATESEQUENCEDICTIONARY.out.reference_dict .join( ch_dict_input.remainder, failOnMismatch: true ) .map { - meta, dict, fasta, fai, mapper_index, circular_target -> + meta, dict, fasta, fai, mapper_index -> - [ meta, fasta, fai, dict, mapper_index, circular_target ] + [ meta, fasta, fai, dict, mapper_index ] } ch_dict_formapperindexing = ch_fasta_for_dict.skip.mix(ch_dicted_formix) @@ -165,7 +168,7 @@ workflow REFERENCE_INDEXING_MULTI { ch_fasta_for_mapperindex = ch_dict_formapperindexing .branch { - meta, fasta, fai, dict, mapper_index, circular_target -> + meta, fasta, fai, dict, mapper_index -> forindex: mapper_index == "" skip: true } @@ -173,9 +176,9 @@ workflow REFERENCE_INDEXING_MULTI { ch_mapindex_input = ch_fasta_for_mapperindex .forindex .multiMap { - meta, fasta, fai, dict, mapper_index, circular_target -> + meta, fasta, fai, dict, mapper_index -> index: [ meta, fasta ] - remainder: [ meta, fasta, fai, dict, circular_target ] + remainder: [ meta, fasta, fai, dict ] } if ( params.mapping_tool == "bwaaln" || params.mapping_tool == "bwamem" ) { @@ -193,15 +196,16 @@ workflow REFERENCE_INDEXING_MULTI { ch_indexed_formix = ch_indexed_forremap .join( ch_mapindex_input.remainder, failOnMismatch: true ) .map { - meta, mapper_index, fasta, fai, dict, circular_target -> + meta, mapper_index, fasta, fai, dict -> - [ meta, fasta, fai, dict, mapper_index, circular_target ] + [ meta, fasta, fai, dict, mapper_index ] } ch_indexmapper_for_reference = ch_fasta_for_mapperindex.skip.mix(ch_indexed_formix) emit: - reference = ch_indexmapper_for_reference // [ meta, fasta, fai, dict, mapindex, circular_target ] + reference = ch_indexmapper_for_reference // [ meta, fasta, fai, dict, mapindex ] + circularmapper = ch_input_from_referencesheet.circularmapper // [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] mitochondrion_header = ch_input_from_referencesheet.mitochondrion_header // [ meta, mitochondrion ] hapmap = ch_input_from_referencesheet.angsd_hapmap // [ meta, hapmap ] pmd_masked_fasta = ch_input_from_referencesheet.pmd_masked_fasta // [ meta, pmd_masked_fasta ] diff --git a/subworkflows/local/reference_indexing_single.nf b/subworkflows/local/reference_indexing_single.nf index 778352680..cddb45a63 100644 --- a/subworkflows/local/reference_indexing_single.nf +++ b/subworkflows/local/reference_indexing_single.nf @@ -90,13 +90,16 @@ workflow REFERENCE_INDEXING_SINGLE { def bedtools_feature = params.mapstats_bedtools_featurefile != null ? file(params.mapstats_bedtools_featurefile, checkIfExists: true ) : "" def genotyping_reference_ploidy = params.genotyping_reference_ploidy def genotyping_gatk_dbsnp = params.genotyping_gatk_dbsnp != null ? file(params.genotyping_gatk_dbsnp, checkIfExists: true ) : "" - [ meta + [ ploidy: genotyping_reference_ploidy ], fasta, fai, dict, mapper_index, params.fasta_circular_target, params.mitochondrion_header, contamination_estimation_angsd_hapmap, pmd_masked_fasta, pmd_bed_for_masking, capture_bed, pileupcaller_bed, pileupcaller_snp, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp ] + def circularmapper_elongated_fasta = params.mapping_circularmapper_elongated_fasta != null ? file( params.mapping_circularmapper_elongated_fasta, checkIfExists: true ) : "" + def circularmapper_elongated_fai = params.mapping_circularmapper_elongated_fai != null ? file( params.mapping_circularmapper_elongated_fai, checkIfExists: true ) : "" + [ meta + [ ploidy: genotyping_reference_ploidy ], fasta, fai, dict, mapper_index, params.fasta_circular_target, params.mitochondrion_header, contamination_estimation_angsd_hapmap, pmd_masked_fasta, pmd_bed_for_masking, capture_bed, pileupcaller_bed, pileupcaller_snp, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp, circularmapper_elongated_fasta, circularmapper_elongated_fai ] } ch_ref_index_single = ch_reference_for_mapping .multiMap{ - meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion_header, contamination_estimation_angsd_hapmap, pmd_masked_fasta, pmd_bed_for_masking, capture_bed, pileupcaller_bed, pileupcaller_snp, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp -> - reference: [ meta, fasta, fai, dict, mapper_index, circular_target ] + meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion_header, contamination_estimation_angsd_hapmap, pmd_masked_fasta, pmd_bed_for_masking, capture_bed, pileupcaller_bed, pileupcaller_snp, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp, circularmapper_elongated_fasta, circularmapper_elongated_fai -> + reference: [ meta, fasta, fai, dict, mapper_index ] + circularmapper: [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] mito_header: [ meta, mitochondrion_header ] hapmap: [ meta, contamination_estimation_angsd_hapmap ] pmd_masked_fasta: [ meta, pmd_masked_fasta ] @@ -109,7 +112,8 @@ workflow REFERENCE_INDEXING_SINGLE { } emit: - reference = ch_ref_index_single.reference // [ meta, fasta, fai, dict, mapindex, circular_target ] + reference = ch_ref_index_single.reference // [ meta, fasta, fai, dict, mapindex ] + circularmapper = ch_ref_index_single.circularmapper // [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] mitochondrion_header = ch_ref_index_single.mito_header // [ meta, mito_header ] hapmap = ch_ref_index_single.hapmap // [ meta, hapmap ] pmd_masked_fasta = ch_ref_index_single.pmd_masked_fasta // [ meta, pmd_masked_fasta ] From cd867490a750afc68529a80849eae2f787961017 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 12 Jul 2024 15:44:01 +0200 Subject: [PATCH 13/61] attempt to add CM SWF --- subworkflows/local/circularmapper.nf | 112 +++++++++++---------------- subworkflows/local/map.nf | 29 +++++++ 2 files changed, 75 insertions(+), 66 deletions(-) diff --git a/subworkflows/local/circularmapper.nf b/subworkflows/local/circularmapper.nf index 1498b6a3d..0ca399110 100644 --- a/subworkflows/local/circularmapper.nf +++ b/subworkflows/local/circularmapper.nf @@ -2,80 +2,60 @@ // Run circularmapper // -include { CIRCULARMAPPER_CIRCULARGENERATOR } from '../../modules/nf-core/circularmapper/circulargenerator/main' -include { CIRCULARMAPPER_REALIGNSAMFILE } from '../../modules/nf-core/circularmapper/realignsamfile/main' -include { FASTQ_ALIGN_BWAALN } from '../../subworkflows/nf-core/fastq_align_bwaaln/main' -include { BWA_INDEX as BWA_INDEX_CIRCULARMAPPER } from '../../modules/nf-core/bwa/index/main' +include { FASTQ_ALIGN_BWAALN_ELONGATED } from '../../subworkflows/nf-core/fastq_align_bwaaln/main' +include { CIRCULARMAPPER_REALIGNSAMFILE } from '../../modules/nf-core/circularmapper/realignsamfile/main' workflow CIRCULARMAPPER { - - // TODO - PRepare input for FASTQ_ALIGN_BWAALN SWF, then use CIRCULARMAPPER_REALIGNSAMFILE file anf index output SAM file to emit. take: - ch_reference // channel (mandatory): [ val(meta), path(reference) ] - elongation_value // channel (mandatory): val(elongation value) - fastq_reads // channel (mandatory): [ val(meta), path(reads) ] + ch_reference // channel (mandatory): [ val(meta), path(index), path(reference) ] + ch_elongated_index // channel (mandatory): [ val(meta), path(elongated_index) ] + ch_fastq_reads // channel (mandatory): [ val(meta), path(reads) ]. subworkImportant: meta REQUIRES single_end` entry! + val_elongation_factor // int (mandatory): Elongation factor used for chromosome circularisation main: ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() + ch_realigned_bams = Channel.empty() + + // While mapping with BWA will need the elongated reference index, RealignSAMFile apparently does NOT need the elongated reference to be present, only the elongation factor. + FASTQ_ALIGN_BWAALN_ELONGATED( ch_fastq_reads, ch_elongated_index ) + ch_versions = ch_versions.mix( FASTQ_ALIGN_BWAALN_ELONGATED.out.versions.first() ) + + ch_ref_for_realignsamfile = ch_reference + .map { + meta, index, reference -> + [ meta, reference ] + } + .map { + // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute + addNewMetaFromAttributes( it, "id" , "reference" , false ) + } + + ch_input_for_realignsamfile = FASTQ_ALIGN_BWAALN_ELONGATED.out.bam + .map{ + // create meta consistent with rest of workflow + meta, bam -> + new_meta = meta + [ reference: meta.id_index ] + [ new_meta, bam ] + } + .map { + // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute + addNewMetaFromAttributes( it, "reference" , "reference" , false ) + } + .combine( ch_ref_for_realignsamfile, by: 0 ) + .multiMap { + ignore_me, meta, bam, ref_meta, ref_index, ref_fasta -> + bam: [ metas, bam ] + fasta: [ ref_meta, ref_fasta ] + } + + CIRCULARMAPPER_REALIGNSAMFILE( ch_input_for_realignsamfile.bam, ch_input_for_realignsamfile.fasta, val_elongation_factor ) + ch_versions = ch_versions.mix( CIRCULARMAPPER_REALIGNSAMFILE.out.versions.first() ) + ch_realigned_bams = ch_realigned_bams.mix( CIRCULARMAPPER_REALIGNSAMFILE.out.bam ) - CIRCULARMAPPER_CIRCULARGENERATOR(ch_reference, elongation_value) - ch_versions = ch_versions.mix( CIRCULARMAPPER_CIRCULARGENERATOR.out.versions.first() ) - - BWA_INDEX_CIRCULARMAPPER(CIRCULARMAPPER_CIRCULARGENERATOR.out.fasta) - ch_versions = ch_versions.mix( BWA_INDEX_CIRCULARMAPPER.out.versions.first() ) - - ch_reference_for_bwa = BWA_INDEX_CIRCULARMAPPER.out.index - .map { - // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute - addNewMetaFromAttributes( it, "id" , "reference" , false ) - } - - ch_input_bwa_aln = fastq_reads - .map { - // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute - addNewMetaFromAttributes( it, "reference" , "reference" , false ) - } - .groupTuple(by:0) - .combine( ch_reference_for_bwa, by: 0 ) - .dump(tag:"ch_input_bwa_aln") - // .multiMap { - // combo_meta, meta, fastq, ref_meta, ref_index -> - // def ids = metas.collect { meta -> meta.id } - // reads: [ combo_meta + [id: ids], fastq ] - // index: [ ref_bai, bai ] - // } - - // BWA_ALN_CIRCULARMAPPER(ch_input_bwa_aln) - // ch_versions = ch_versions.mix( BWA_ALN_CIRCULARMAPPER.out.versions.first() ) - - // ch_input_bwa_samse = ch_input_bwa_aln - // .combine( BWA_ALN_CIRCULARMAPPER.out.sai, by: 0 ) // [ [meta], fastq, bai, sai ] - // .multiMap { - // metas, fastq, ref_bai, bai, ref_sai, sai -> - // fastqs: [ metas, fastq, sai ] - // bai: [ ref_bai, bai ] - // } - - // BWA_SAMSE_CIRCULARMAPPER(ch_input_bwa_samse) - // ch_versions = ch_versions.mix( BWA_SAMSE_CIRCULARMAPPER.out.versions.first() ) - - // ch_input_realignsamfile = BWA_SAMSE_CIRCULARMAPPER.out.bam - // .combine(CIRCULARMAPPER_CIRCULARGENERATOR.out.fasta, by: 0) - // .combine(ch_eval) - // .multiMap { - // ref_bam, bam, ref_fasta, fasta, ch_eval -> - // bam: [ ref_bam, bam ] - // fasta: [ ref_fasta, fasta ] - // eval: [ ch_eval ] - // } - - // CIRCULARMAPPER_REALIGNSAMFILE(ch_input_realignsamfile) - // ch_versions = ch_versions.mix( CIRCULARMAPPER_REALIGNSAMFILE.out.versions.first() ) emit: - - bam = channel.empty() //CIRCULARMAPPER_REALIGNSAMFILE.out.bam // channel: [ val(meta), path(bam) ] - versions = ch_versions // channel: [ path(versions.yml) ] - + bam = ch_realigned_bams // channel: [ val(meta), path(bam) ] + versions = ch_versions + mqc = ch_multiqc_files } diff --git a/subworkflows/local/map.nf b/subworkflows/local/map.nf index 1ffacf706..32291ffaa 100644 --- a/subworkflows/local/map.nf +++ b/subworkflows/local/map.nf @@ -117,6 +117,35 @@ workflow MAP { ch_mapped_lane_bai = params.fasta_largeref ? SAMTOOLS_INDEX_BT2.out.csi : SAMTOOLS_INDEX_BT2.out.bai } else if ( params.mapping_tool == 'circularmapper' ) { + ch_index_for_mapping = index.map{ meta, index, fasta -> [ meta, index ] } + ch_elongated_reference_for_mapping = elogated_index.map{ meta, index, fasta, circular_target -> [ meta, index ] } + ch_reads_for_mapping = reads + + CIRCULARMAPPER( ch_index_for_mapping, ch_elongated_reference_for_mapping, ch_reads_for_mapping ) + + // // Join the original and elongated references, then combine with the reads, and multiMap to ensure correct ordering of channel contents. + // ch_reads_for_circularmapper = reads.map { + // // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute + // addNewMetaFromAttributes( it, "reference" , "reference" , false ) + // } + + // ch_input_for_circularmapper = index.join( elogated_index ) + // .map { + // meta, index, fasta, elongated_index, elongated_fasta, circular_target -> + // [ meta, index, fasta , elongated_index, elongated_fasta ] + // } + // .map { + // // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute + // addNewMetaFromAttributes( it, "id" , "reference" , false ) + // } + // .combine( ch_reads_for_circularmapper, by: 0) + // .multiMap { + // ignore_me, meta, index, fasta, elongated_index, elongated_fasta, circular_target, meta2, fasta, reads -> + // reads: [ meta, reads ] + // reference: [ meta, index, fasta ] + // elongated_reference: [meta, elongated_index , elongated_index] + // } + // Reference elongation and indexing takes place in the reference_indexing swf. // Circularmapper takes non-elongated AND elongated references and reads as input (i think. wait for Alex's reply). From e05cc42459b2fb91d3e1809b9fbc0d2b91b98d03 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 15 Jul 2024 14:10:41 +0200 Subject: [PATCH 14/61] index output bams --- subworkflows/local/circularmapper.nf | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/subworkflows/local/circularmapper.nf b/subworkflows/local/circularmapper.nf index 0ca399110..dde31729f 100644 --- a/subworkflows/local/circularmapper.nf +++ b/subworkflows/local/circularmapper.nf @@ -2,8 +2,9 @@ // Run circularmapper // -include { FASTQ_ALIGN_BWAALN_ELONGATED } from '../../subworkflows/nf-core/fastq_align_bwaaln/main' -include { CIRCULARMAPPER_REALIGNSAMFILE } from '../../modules/nf-core/circularmapper/realignsamfile/main' +include { FASTQ_ALIGN_BWAALN as FASTQ_ALIGN_BWAALN_ELONGATED } from '../../subworkflows/nf-core/fastq_align_bwaaln/main' +include { CIRCULARMAPPER_REALIGNSAMFILE } from '../../modules/nf-core/circularmapper/realignsamfile/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_REALIGNED } from '../../modules/nf-core/samtools/index/main' workflow CIRCULARMAPPER { take: @@ -16,6 +17,8 @@ workflow CIRCULARMAPPER { ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() ch_realigned_bams = Channel.empty() + ch_realigned_bais = Channel.empty() + ch_realigned_csis = Channel.empty() // While mapping with BWA will need the elongated reference index, RealignSAMFile apparently does NOT need the elongated reference to be present, only the elongation factor. FASTQ_ALIGN_BWAALN_ELONGATED( ch_fastq_reads, ch_elongated_index ) @@ -33,10 +36,11 @@ workflow CIRCULARMAPPER { ch_input_for_realignsamfile = FASTQ_ALIGN_BWAALN_ELONGATED.out.bam .map{ - // create meta consistent with rest of workflow + // create meta consistent with rest of MAP workflow + // TODO: Check that the id_index is correctly set and remove the elongation factor suffix if necessary. meta, bam -> - new_meta = meta + [ reference: meta.id_index ] - [ new_meta, bam ] + new_meta = meta + [ reference: meta.id_index ] + [ new_meta, bam ] } .map { // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute @@ -44,7 +48,7 @@ workflow CIRCULARMAPPER { } .combine( ch_ref_for_realignsamfile, by: 0 ) .multiMap { - ignore_me, meta, bam, ref_meta, ref_index, ref_fasta -> + ignore_me, meta, bam, ref_meta, ref_fasta -> bam: [ metas, bam ] fasta: [ ref_meta, ref_fasta ] } @@ -53,9 +57,15 @@ workflow CIRCULARMAPPER { ch_versions = ch_versions.mix( CIRCULARMAPPER_REALIGNSAMFILE.out.versions.first() ) ch_realigned_bams = ch_realigned_bams.mix( CIRCULARMAPPER_REALIGNSAMFILE.out.bam ) + SAMTOOLS_INDEX_REALIGNED( ch_realigned_bams ) + ch_versions = ch_versions.mix( SAMTOOLS_INDEX_REALIGNED.out.versions.first() ) + ch_realigned_bais = ch_realigned_bais.mix( SAMTOOLS_INDEX_REALIGNED.out.bai ) + ch_realigned_csis = ch_realigned_csis.mix( SAMTOOLS_INDEX_REALIGNED.out.csi ) emit: - bam = ch_realigned_bams // channel: [ val(meta), path(bam) ] + bam = ch_realigned_bams // [ val(meta), path(bam) ] + bai = ch_realigned_bais // [ val(meta), path(bai) ] + csi = ch_realigned_csis // [ val(meta), path(csi) ] versions = ch_versions mqc = ch_multiqc_files } From 1764505d5e4f42b0d79f629645c5d3866a9117c3 Mon Sep 17 00:00:00 2001 From: scarlhoff Date: Tue, 16 Jul 2024 13:57:48 +0200 Subject: [PATCH 15/61] add validation and address comments --- subworkflows/local/reference_indexing.nf | 34 ++++++++++--------- .../local/reference_indexing_multi.nf | 2 +- .../local/reference_indexing_single.nf | 2 +- .../local/utils_nfcore_eager_pipeline/main.nf | 2 +- 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/subworkflows/local/reference_indexing.nf b/subworkflows/local/reference_indexing.nf index 02a2b2621..ad9e18b05 100644 --- a/subworkflows/local/reference_indexing.nf +++ b/subworkflows/local/reference_indexing.nf @@ -2,12 +2,12 @@ // Prepare reference indexing for downstream // -include { REFERENCE_INDEXING_SINGLE } from '../../subworkflows/local/reference_indexing_single.nf' -include { REFERENCE_INDEXING_MULTI } from '../../subworkflows/local/reference_indexing_multi.nf' -include { GUNZIP as GUNZIP_PMDBED } from '../../modules/nf-core/gunzip/main.nf' -include { GUNZIP as GUNZIP_PMDFASTA } from '../../modules/nf-core/gunzip/main.nf' -include { GUNZIP as GUNZIP_SNPBED } from '../../modules/nf-core/gunzip/main.nf' -include { GUNZIP as GUNZIP_CM_FASTA } from '../../modules/nf-core/gunzip/main.nf' +include { REFERENCE_INDEXING_SINGLE } from '../../subworkflows/local/reference_indexing_single.nf' +include { REFERENCE_INDEXING_MULTI } from '../../subworkflows/local/reference_indexing_multi.nf' +include { GUNZIP as GUNZIP_PMDBED } from '../../modules/nf-core/gunzip/main.nf' +include { GUNZIP as GUNZIP_PMDFASTA } from '../../modules/nf-core/gunzip/main.nf' +include { GUNZIP as GUNZIP_SNPBED } from '../../modules/nf-core/gunzip/main.nf' +include { GUNZIP as GUNZIP_ELONGATED_FASTA } from '../../modules/nf-core/gunzip/main.nf' workflow REFERENCE_INDEXING { take: @@ -27,7 +27,7 @@ workflow REFERENCE_INDEXING { // If input (multi-)reference sheet supplied REFERENCE_INDEXING_MULTI ( fasta ) ch_reference_for_mapping = REFERENCE_INDEXING_MULTI.out.reference - ch_circularmapper = REFERENCE_INDEXING_MULTI.out.circularmapper + ch_elongated_reference = REFERENCE_INDEXING_MULTI.out.elongated_reference ch_mitochondrion_header = REFERENCE_INDEXING_MULTI.out.mitochondrion_header ch_hapmap = REFERENCE_INDEXING_MULTI.out.hapmap ch_pmd_masked_fasta = REFERENCE_INDEXING_MULTI.out.pmd_masked_fasta @@ -41,7 +41,7 @@ workflow REFERENCE_INDEXING { } else { // If input FASTA and/or indicies supplied REFERENCE_INDEXING_SINGLE ( fasta, fasta_fai, fasta_dict, fasta_mapperindexdir ) - ch_circularmapper = REFERENCE_INDEXING_SINGLE.out.circularmapper + ch_elongated_reference = REFERENCE_INDEXING_SINGLE.out.elongated_reference ch_mitochondrion_header = REFERENCE_INDEXING_SINGLE.out.mitochondrion_header ch_hapmap = REFERENCE_INDEXING_SINGLE.out.hapmap ch_pmd_masked_fasta = REFERENCE_INDEXING_SINGLE.out.pmd_masked_fasta @@ -128,30 +128,32 @@ workflow REFERENCE_INDEXING { ch_dbsnp = ch_dbsnp .filter { it[1] != "" } - ch_circularmapper_for_gunzip = ch_circularmapper - .filter{ it[1] != "" || it[2] != "" || it[3] != "" } + ch_elongated_for_gunzip = ch_elongated_reference + .filter{ it[1] != "" && it[2] != "" } + .ifEmpty{ if(params.mapping_tool == "circularmapper" ) { error "[nf-core/eager]: ERROR: Mapping with circularmapper requires either a circular target or elongated reference file." } } + .filter( it != null ) .branch{ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai -> forgunzip: circularmapper_elongated_fasta.extension == "gz" skip: true } - ch_circularmapper_input = ch_circularmapper_for_gunzip.gunzip + ch_elongated_input = ch_elongated_for_gunzip.gunzip .multiMap{ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai -> gunzip: [ meta, circularmapper_elongated_fasta ] remainder: [ meta, circular_target, circularmapper_elongated_fai ] } - GUNZIP_CM_FASTA( ch_circularmapper_input ) - ch_version = ch_versions.mix( GUNZIP_CM_FASTA.out.versions.first() ) + GUNZIP_ELONGATED_FASTA( ch_elongated_input.gunzip ) + ch_version = ch_versions.mix( GUNZIP_ELONGATED_FASTA.out.versions.first() ) - ch_gunzipped_elongated = GUNZIP_CM_FASTA.out.gunzip.join( ch_circularmapper_input.remainder, failOnMismatch: true ) - ch_circularmapper_gunzipped = ch_circularmapper_for_gunzip.skip.mix( ch_gunzipped_elongated ) + ch_elongated_gunzipped = GUNZIP_ELONGATED_FASTA.out.gunzip.join( ch_elongated_input.remainder, failOnMismatch: true ) + ch_elongated_after_gunzip = ch_elongated_for_gunzip.skip.mix( ch_elongated_gunzipped ) emit: reference = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex, circular_target ] - circularmapper = ch_circularmapper_gunzipped // [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] + elongated_reference = ch_elongated_after_gunzip // [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] mitochondrion_header = ch_mitochondrion_header // [ meta, mitochondrion_header ] hapmap = ch_hapmap // [ meta, hapmap ] pmd_masking = ch_pmd_masking // [ meta, pmd_masked_fasta, pmd_bed_for_masking ] diff --git a/subworkflows/local/reference_indexing_multi.nf b/subworkflows/local/reference_indexing_multi.nf index f71280bc6..628d58a90 100644 --- a/subworkflows/local/reference_indexing_multi.nf +++ b/subworkflows/local/reference_indexing_multi.nf @@ -205,7 +205,7 @@ workflow REFERENCE_INDEXING_MULTI { emit: reference = ch_indexmapper_for_reference // [ meta, fasta, fai, dict, mapindex ] - circularmapper = ch_input_from_referencesheet.circularmapper // [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] + elongated_reference = ch_input_from_referencesheet.circularmapper // [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] mitochondrion_header = ch_input_from_referencesheet.mitochondrion_header // [ meta, mitochondrion ] hapmap = ch_input_from_referencesheet.angsd_hapmap // [ meta, hapmap ] pmd_masked_fasta = ch_input_from_referencesheet.pmd_masked_fasta // [ meta, pmd_masked_fasta ] diff --git a/subworkflows/local/reference_indexing_single.nf b/subworkflows/local/reference_indexing_single.nf index cddb45a63..f4c9e42cd 100644 --- a/subworkflows/local/reference_indexing_single.nf +++ b/subworkflows/local/reference_indexing_single.nf @@ -113,7 +113,7 @@ workflow REFERENCE_INDEXING_SINGLE { emit: reference = ch_ref_index_single.reference // [ meta, fasta, fai, dict, mapindex ] - circularmapper = ch_ref_index_single.circularmapper // [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] + elongated_reference = ch_ref_index_single.circularmapper // [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] mitochondrion_header = ch_ref_index_single.mito_header // [ meta, mito_header ] hapmap = ch_ref_index_single.hapmap // [ meta, hapmap ] pmd_masked_fasta = ch_ref_index_single.pmd_masked_fasta // [ meta, pmd_masked_fasta ] diff --git a/subworkflows/local/utils_nfcore_eager_pipeline/main.nf b/subworkflows/local/utils_nfcore_eager_pipeline/main.nf index 8809f7597..a7d75dd28 100644 --- a/subworkflows/local/utils_nfcore_eager_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_eager_pipeline/main.nf @@ -231,7 +231,7 @@ def validateInputParameters() { if ( params.genotyping_source == 'pmd' && ! params.run_pmd_filtering ) { exit 1, ("[nf-core/eager] ERROR: --genotyping_source cannot be 'pmd' unless PMD-filtering is ran.") } if ( params.genotyping_source == 'rescaled' && ! params.run_mapdamage_rescaling ) { exit 1, ("[nf-core/eager] ERROR: --genotyping_source cannot be 'rescaled' unless aDNA damage rescaling is ran.") } if ( params.fasta && params.run_genotyping && params.genotyping_tool == 'pileupcaller' && ! (params.genotyping_pileupcaller_bedfile || params.genotyping_pileupcaller_snpfile ) ) { exit 1, ("[nf-core/eager] ERROR: Genotyping with pileupcaller requires both '--genotyping_pileupcaller_bedfile' AND '--genotyping_pileupcaller_snpfile' to be provided.") } - + if ( params.fasta && params.mapping_tool == "circularmapper" && ! params.fasta_circular_target && ! params.mapping_circularmapper_elongated_fasta ) { exit 1, ("[nf-core/eager] ERROR: Mapping with circularmapper requires either --fasta_circular_target or --mapping_circularmapper_elongated_fasta. ") } } // From dff6208faa892b02baaf6050d4cebedb65b84578 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 19 Jul 2024 11:18:34 +0200 Subject: [PATCH 16/61] update CM modules --- modules.json | 4 ++-- .../circularmapper/circulargenerator/main.nf | 19 ++++++++++----- .../circularmapper/circulargenerator/meta.yml | 23 ++++++++++++++++--- .../circularmapper/realignsamfile/main.nf | 2 +- .../circularmapper/realignsamfile/meta.yml | 5 ++++ 5 files changed, 41 insertions(+), 12 deletions(-) diff --git a/modules.json b/modules.json index b447553e5..ed96492a2 100644 --- a/modules.json +++ b/modules.json @@ -97,12 +97,12 @@ }, "circularmapper/circulargenerator": { "branch": "master", - "git_sha": "5890d9e73aaa803fc6be94b1822539b4204d8cff", + "git_sha": "0148d00e72e35cd08b3d829d7de3430bc0c92a5a", "installed_by": ["modules"] }, "circularmapper/realignsamfile": { "branch": "master", - "git_sha": "5890d9e73aaa803fc6be94b1822539b4204d8cff", + "git_sha": "579d2d5f15e126a2190a7b709dfc77696c83688d", "installed_by": ["modules"] }, "damageprofiler": { diff --git a/modules/nf-core/circularmapper/circulargenerator/main.nf b/modules/nf-core/circularmapper/circulargenerator/main.nf index b1664d032..07b722c39 100644 --- a/modules/nf-core/circularmapper/circulargenerator/main.nf +++ b/modules/nf-core/circularmapper/circulargenerator/main.nf @@ -1,5 +1,5 @@ // This module does the following: -//creating a modified reference genome, with an elongation of the an specified amount of bases +//creating a modified reference genome, with an elongation_factoration of the an specified amount of bases process CIRCULARMAPPER_CIRCULARGENERATOR { tag "$meta.id" @@ -12,10 +12,11 @@ process CIRCULARMAPPER_CIRCULARGENERATOR { input: tuple val(meta), path(reference) - val(elong) + tuple val(meta2), val(elongation_factor) + tuple val(meta3), val(target) output: - tuple val(meta), path("*_${elong}.fasta"), emit: fasta + tuple val(meta), path("*_${elongation_factor}.fasta"), emit: fasta path "versions.yml" , emit: versions when: @@ -25,11 +26,17 @@ process CIRCULARMAPPER_CIRCULARGENERATOR { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - circulargenerator -e ${elong} \ + circulargenerator \ + -e ${elongation_factor} \ -i ${reference} \ - -s ${prefix} \ + -s ${target} \ $args + ## circulargenerator has a hardcoded output name. Rename if necessary to use prefix. + if [[ "${reference.getBaseName()}_${elongation_factor}.fasta" != "${prefix}_${elongation_factor}.fasta" ]]; then + mv ${reference.getBaseName()}_${elongation_factor}.fasta ${prefix}_${elongation_factor}.fasta + fi + cat <<-END_VERSIONS > versions.yml "${task.process}": circulargenerator: \$(circulargenerator -h | grep 'usage' | sed 's/usage: CircularGenerator//') @@ -40,7 +47,7 @@ process CIRCULARMAPPER_CIRCULARGENERATOR { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}_${elong}.fasta + touch ${prefix}_${elongation_factor}.fasta cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/circularmapper/circulargenerator/meta.yml b/modules/nf-core/circularmapper/circulargenerator/meta.yml index 3e6a51ada..2704fbc36 100644 --- a/modules/nf-core/circularmapper/circulargenerator/meta.yml +++ b/modules/nf-core/circularmapper/circulargenerator/meta.yml @@ -19,18 +19,34 @@ input: - meta: type: map description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` + Groovy Map containing reference information + e.g. `[ id:'sample1' ]` - reference: type: file description: Genome fasta file pattern: "*.fasta" - - elong: + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'sample1' ]` + + - elongation_factor: type: integer description: The number of bases that the ends of the target chromosome in the reference genome should be elongated by + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'sample1' ]` + + - target: + type: string + description: The name of the chromosome in the reference genome that should be elongated + output: #Only when we have meta - meta: @@ -51,5 +67,6 @@ output: authors: - "@apalleja" + - "@TCLamnidis" maintainers: - "" diff --git a/modules/nf-core/circularmapper/realignsamfile/main.nf b/modules/nf-core/circularmapper/realignsamfile/main.nf index 579815df0..9d74f7b91 100644 --- a/modules/nf-core/circularmapper/realignsamfile/main.nf +++ b/modules/nf-core/circularmapper/realignsamfile/main.nf @@ -10,7 +10,7 @@ process CIRCULARMAPPER_REALIGNSAMFILE { input: tuple val(meta), path(bam) tuple val(meta2), path(fasta) - val(elongation_factor) + tuple val(meta3), val(elongation_factor) output: tuple val(meta), path("*_realigned.bam") , emit: bam diff --git a/modules/nf-core/circularmapper/realignsamfile/meta.yml b/modules/nf-core/circularmapper/realignsamfile/meta.yml index bc4173754..fbb62d76d 100644 --- a/modules/nf-core/circularmapper/realignsamfile/meta.yml +++ b/modules/nf-core/circularmapper/realignsamfile/meta.yml @@ -35,6 +35,11 @@ input: - fasta: type: file description: Input elongated genome fasta + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` - elongation_factor: type: integer description: The elongation factor used when running circulargenerator, i.e. the number of bases that the ends of the target chromosome in the reference genome was elongated by From c1400620286a2d5a6346d09007a4f67e901ed8ba Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 19 Jul 2024 11:27:15 +0200 Subject: [PATCH 17/61] updates to reference elongation SWF --- subworkflows/local/elongate_reference.nf | 55 ++++++++++++++---------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/subworkflows/local/elongate_reference.nf b/subworkflows/local/elongate_reference.nf index 8c949ba4d..b0936f3a2 100644 --- a/subworkflows/local/elongate_reference.nf +++ b/subworkflows/local/elongate_reference.nf @@ -7,48 +7,57 @@ include { BWA_INDEX as BWA_INDEX_CIRCULARISED } from '../../modules/nf-core/bwa/ workflow ELONGATE_REFERENCE { take: - ch_reference // [ meta, fasta, fai ] - ch_elongated_reference // [ meta, elongated_fasta, elongated_fai ] - elongation_factor // [ int ] - // TODO CIRCULARMAPPER_CIRCULARGENERATOR module needs updating. `-s` option is the circular target and not the output file >.< + ch_reference // [ meta, fasta, fai, dict, mapindex ] + ch_elongated_reference // [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] main: - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + ch_circular_reference = Channel.empty() /* - Check what fasta files we have: + Check what fasta files we have. There are four options: - 1. Elongated reference with index (ignore circular target) - 2. Elongated reference without index (ignore circular target) - 3. No elongated reference, but circular target - 4. None of the above -> Throw error (should go in parameter validation) + 1. Elongated reference with index (ignore circular target) -> Pass through + 2. Elongated reference without index (ignore circular target) -> Index and emit + 3. No elongated reference, but circular target -> Elongate, index and emit. + 4. None of the above -> Throw error and stop execution during parameter validation */ + ch_circulargenerator_input = ch_elongated_reference .branch{ - meta, elongated_fasta_index, elongated_fasta, circular_target -> - ready: elongated_fasta != "" && elongated_fasta_index != "" - needs_index: elongated_fasta != "" && elongated_fasta_index == "" - needs_elongation: elongated_fasta == "" && circular_target != "" + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai -> + ready: circularmapper_elongated_fasta != "" && circularmapper_elongated_fai != "" + needs_index: circularmapper_elongated_fasta != "" && circularmapper_elongated_fai == "" + needs_elongation: circularmapper_elongated_fasta == "" && circular_target != "" } // Elongate references that need it // Join the original references to the branch of needs_elongation, to get the original fasta files, and elongate them. ch_references_to_elongate = ch_circulargenerator_input.needs_elongation .join( ch_reference ) - .map { - meta, elongated_fasta_index, elongated_fasta, circular_target, meta2, index, fasta -> - [ meta, fasta ] + .multiMap { + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai, fasta, fai, dict, mapindex -> + + def elongation_factor = params.mapping_circularmapper_elongation_factor + + fasta: [ meta, fasta ] + elongation_factor : [ meta, elongation_factor ] + target: [ meta, circular_target ] } - CIRCULARMAPPER_CIRCULARGENERATOR(ch_circulargenerator_input.needs_elongation, elongation_value) + CIRCULARMAPPER_CIRCULARGENERATOR( + ch_circulargenerator_input.needs_elongation.fasta, + ch_circulargenerator_input.needs_elongation.elongation_factor, + ch_circulargenerator_input.needs_elongation.target + ) ch_versions = ch_versions.mix( CIRCULARMAPPER_CIRCULARGENERATOR.out.versions.first() ) // Collect newly generated circular references and provided ones without an index, and index them. ch_input_for_circular_indexing = ch_circulargenerator_input.needs_index .map { - meta, elongated_fasta_index, elongated_fasta, circular_target -> - [ meta, elongated_fasta ] + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai -> + [ meta, circularmapper_elongated_fasta ] } .mix( CIRCULARMAPPER_CIRCULARGENERATOR.out.fasta ) @@ -61,8 +70,8 @@ workflow ELONGATE_REFERENCE { // Then put all the indexed elongated references together and emit them ch_circular_reference = ch_circulargenerator_input.ready .map { - meta, elongated_fasta_index, elongated_fasta, circular_target -> - [ meta, elongated_fasta, elongated_fasta_index ] + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai -> + [ meta, circularmapper_elongated_fasta, circularmapper_elongated_fai ] } .mix( ch_indexed_references ) From 4cd7a190be8bfdb29125b708ae3428479f348d09 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 19 Jul 2024 12:56:13 +0200 Subject: [PATCH 18/61] Add unzipping in ELONGATE_REFERENCE SWF --- subworkflows/local/elongate_reference.nf | 34 +++++++++++++++++++++--- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/elongate_reference.nf b/subworkflows/local/elongate_reference.nf index b0936f3a2..a62691140 100644 --- a/subworkflows/local/elongate_reference.nf +++ b/subworkflows/local/elongate_reference.nf @@ -2,12 +2,12 @@ // Elongate a reference genome by circularising the target sequence by a given elongation factor. // -include { CIRCULARMAPPER_CIRCULARGENERATOR } from '../../modules/nf-core/circularmapper/circulargenerator/main' +include { GUNZIP as GUNZIP_ELONGATED_FASTA } from '../../modules/nf-core/gunzip/main' +include { CIRCULARMAPPER_CIRCULARGENERATOR } from '../../modules/nf-core/circularmapper/circulargenerator/main' include { BWA_INDEX as BWA_INDEX_CIRCULARISED } from '../../modules/nf-core/bwa/index/main' workflow ELONGATE_REFERENCE { take: - ch_reference // [ meta, fasta, fai, dict, mapindex ] ch_elongated_reference // [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] main: @@ -15,6 +15,32 @@ workflow ELONGATE_REFERENCE { ch_multiqc_files = Channel.empty() ch_circular_reference = Channel.empty() + // Check if the elongated reference is gzipped, and if so, unzip it. + ch_elongated_branches = ch_elongated_reference + .branch { + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai -> + + for_gunzip: circularmapper_elongated_fasta.extension == "gz" + skip_gunzip: true + } + + ch_elongated_for_gunzip = ch_elongated_branches.for_gunzip + .map { + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai -> + [ meta, circularmapper_elongated_fasta ] + } + + GUNZIP_ELONGATED_FASTA( ch_elongated_for_gunzip.for_gunzip ) + ch_versions = ch_versions.mix( GUNZIP_ELONGATED_FASTA.out.versions.first() ) + + ch_elongated_unzipped_reference = ch_elongated_branches.for_gunzip + .join( GUNZIP_ELONGATED_FASTA.out.gunzip ) + .map { + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai, unzipped_fasta -> + [ meta, circular_target, unzipped_fasta, circularmapper_elongated_fai ] + } + .mix( ch_elongated_branches.skip_gunzip ) + /* Check what fasta files we have. There are four options: @@ -24,7 +50,7 @@ workflow ELONGATE_REFERENCE { 4. None of the above -> Throw error and stop execution during parameter validation */ - ch_circulargenerator_input = ch_elongated_reference + ch_circulargenerator_input = ch_elongated_unzipped_reference .branch{ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai -> ready: circularmapper_elongated_fasta != "" && circularmapper_elongated_fai != "" @@ -76,7 +102,7 @@ workflow ELONGATE_REFERENCE { .mix( ch_indexed_references ) emit: - circular_reference = ch_circular_reference // [ meta, fasta, fai ] + circular_reference = ch_circular_reference // [ meta, circular_target, fasta, fai ] versions = ch_versions mqc = ch_multiqc_files From de747de76aaf29b08110f66c6d333c191d08153a Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 19 Jul 2024 12:59:15 +0200 Subject: [PATCH 19/61] unzip, elongate and index reference when CM is used --- subworkflows/local/reference_indexing.nf | 27 ++++++++---------------- 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/subworkflows/local/reference_indexing.nf b/subworkflows/local/reference_indexing.nf index ad9e18b05..23523f025 100644 --- a/subworkflows/local/reference_indexing.nf +++ b/subworkflows/local/reference_indexing.nf @@ -8,6 +8,8 @@ include { GUNZIP as GUNZIP_PMDBED } from '../../modules/nf-core/gunzip/ include { GUNZIP as GUNZIP_PMDFASTA } from '../../modules/nf-core/gunzip/main.nf' include { GUNZIP as GUNZIP_SNPBED } from '../../modules/nf-core/gunzip/main.nf' include { GUNZIP as GUNZIP_ELONGATED_FASTA } from '../../modules/nf-core/gunzip/main.nf' +include { ELONGATE_REFERENCE } from '../../subworkflows/local/elongate_reference' + workflow REFERENCE_INDEXING { take: @@ -132,28 +134,17 @@ workflow REFERENCE_INDEXING { .filter{ it[1] != "" && it[2] != "" } .ifEmpty{ if(params.mapping_tool == "circularmapper" ) { error "[nf-core/eager]: ERROR: Mapping with circularmapper requires either a circular target or elongated reference file." } } .filter( it != null ) - .branch{ - meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai -> - forgunzip: circularmapper_elongated_fasta.extension == "gz" - skip: true - } - - ch_elongated_input = ch_elongated_for_gunzip.gunzip - .multiMap{ - meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai -> - gunzip: [ meta, circularmapper_elongated_fasta ] - remainder: [ meta, circular_target, circularmapper_elongated_fai ] - } - GUNZIP_ELONGATED_FASTA( ch_elongated_input.gunzip ) - ch_version = ch_versions.mix( GUNZIP_ELONGATED_FASTA.out.versions.first() ) - - ch_elongated_gunzipped = GUNZIP_ELONGATED_FASTA.out.gunzip.join( ch_elongated_input.remainder, failOnMismatch: true ) - ch_elongated_after_gunzip = ch_elongated_for_gunzip.skip.mix( ch_elongated_gunzipped ) + if ( params.mapping_tool == "circularmapper" ) { + // This ELONGATE_REFERENCE subworkflow also checks if the provided reference is gzipped, and unzips it if necessary. + ELONGATE_REFERENCE( ch_input_from_referencesheet.circularmapper ) + ch_version = ch_versions.mix( ELONGATE_REFERENCE.out.versions ) + ch_elongated_reference = ELONGATE_REFERENCE.out.circular_reference + } emit: reference = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex, circular_target ] - elongated_reference = ch_elongated_after_gunzip // [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] + elongated_reference = ch_elongated_reference // [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] mitochondrion_header = ch_mitochondrion_header // [ meta, mitochondrion_header ] hapmap = ch_hapmap // [ meta, hapmap ] pmd_masking = ch_pmd_masking // [ meta, pmd_masked_fasta, pmd_bed_for_masking ] From ff2ad9ada9e2d21aca1e3d84063310c4640b2bf4 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 19 Jul 2024 13:31:45 +0200 Subject: [PATCH 20/61] add original reference input --- subworkflows/local/elongate_reference.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/elongate_reference.nf b/subworkflows/local/elongate_reference.nf index a62691140..1d29c2241 100644 --- a/subworkflows/local/elongate_reference.nf +++ b/subworkflows/local/elongate_reference.nf @@ -8,6 +8,7 @@ include { BWA_INDEX as BWA_INDEX_CIRCULARISED } from '../../modules/nf-core/bwa/ workflow ELONGATE_REFERENCE { take: + ch_reference // [ meta, fasta, fai, dict, mapindex ] ch_elongated_reference // [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] main: @@ -105,5 +106,4 @@ workflow ELONGATE_REFERENCE { circular_reference = ch_circular_reference // [ meta, circular_target, fasta, fai ] versions = ch_versions mqc = ch_multiqc_files - } From b48e80f17abbe153bb9d754db432032732038854 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 19 Jul 2024 13:34:16 +0200 Subject: [PATCH 21/61] i broke something Q_Q --- subworkflows/local/reference_indexing.nf | 44 +++++++++++++----------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/subworkflows/local/reference_indexing.nf b/subworkflows/local/reference_indexing.nf index 23523f025..0e84a3858 100644 --- a/subworkflows/local/reference_indexing.nf +++ b/subworkflows/local/reference_indexing.nf @@ -7,9 +7,7 @@ include { REFERENCE_INDEXING_MULTI } from '../../subworkflows/local/refe include { GUNZIP as GUNZIP_PMDBED } from '../../modules/nf-core/gunzip/main.nf' include { GUNZIP as GUNZIP_PMDFASTA } from '../../modules/nf-core/gunzip/main.nf' include { GUNZIP as GUNZIP_SNPBED } from '../../modules/nf-core/gunzip/main.nf' -include { GUNZIP as GUNZIP_ELONGATED_FASTA } from '../../modules/nf-core/gunzip/main.nf' -include { ELONGATE_REFERENCE } from '../../subworkflows/local/elongate_reference' - +include { ELONGATE_REFERENCE } from '../../subworkflows/local/elongate_reference.nf' workflow REFERENCE_INDEXING { take: @@ -130,30 +128,34 @@ workflow REFERENCE_INDEXING { ch_dbsnp = ch_dbsnp .filter { it[1] != "" } - ch_elongated_for_gunzip = ch_elongated_reference - .filter{ it[1] != "" && it[2] != "" } - .ifEmpty{ if(params.mapping_tool == "circularmapper" ) { error "[nf-core/eager]: ERROR: Mapping with circularmapper requires either a circular target or elongated reference file." } } - .filter( it != null ) - + // Elongate reference for circularmapper if requested if ( params.mapping_tool == "circularmapper" ) { + // Throw errors if required parameters are missing + ch_elongated_for_gunzip = ch_elongated_reference + .filter{ it[1] != "" && it[2] != "" } + .ifEmpty{ error "[nf-core/eager]: ERROR: Mapping with circularmapper requires either a circular target or elongated reference file for at least one reference." } + .filter( it != null ) // Remove null channel which arises if empty cause error returns null. + // This ELONGATE_REFERENCE subworkflow also checks if the provided reference is gzipped, and unzips it if necessary. - ELONGATE_REFERENCE( ch_input_from_referencesheet.circularmapper ) + ELONGATE_REFERENCE( ch_input_from_referencesheet.circularmapper, ch_reference_for_mapping ) ch_version = ch_versions.mix( ELONGATE_REFERENCE.out.versions ) - ch_elongated_reference = ELONGATE_REFERENCE.out.circular_reference + ch_elongated_indexed_reference = ELONGATE_REFERENCE.out.circular_reference + } else { + ch_elongated_indexed_reference = ch_elongated_reference } emit: - reference = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex, circular_target ] - elongated_reference = ch_elongated_reference // [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] - mitochondrion_header = ch_mitochondrion_header // [ meta, mitochondrion_header ] - hapmap = ch_hapmap // [ meta, hapmap ] - pmd_masking = ch_pmd_masking // [ meta, pmd_masked_fasta, pmd_bed_for_masking ] - pmd_bed_for_masking = ch_pmd_bed_for_masking // [ meta, pmd_bed_for_masking ] - snp_capture_bed = ch_capture_bed // [ meta, capture_bed ] - pileupcaller_bed_snp = ch_pileupcaller_bed_snp // [ meta, pileupcaller_bed, pileupcaller_snp ] - sexdeterrmine_bed = ch_sexdeterrmine_bed // [ meta, sexdet_bed ] - bedtools_feature = ch_bedtools_feature // [ meta, bedtools_feature ] - dbsnp = ch_dbsnp // [ meta, dbsnp ] + reference = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex ] + elongated_reference = ch_elongated_indexed_reference // [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] + mitochondrion_header = ch_mitochondrion_header // [ meta, mitochondrion_header ] + hapmap = ch_hapmap // [ meta, hapmap ] + pmd_masking = ch_pmd_masking // [ meta, pmd_masked_fasta, pmd_bed_for_masking ] + pmd_bed_for_masking = ch_pmd_bed_for_masking // [ meta, pmd_bed_for_masking ] + snp_capture_bed = ch_capture_bed // [ meta, capture_bed ] + pileupcaller_bed_snp = ch_pileupcaller_bed_snp // [ meta, pileupcaller_bed, pileupcaller_snp ] + sexdeterrmine_bed = ch_sexdeterrmine_bed // [ meta, sexdet_bed ] + bedtools_feature = ch_bedtools_feature // [ meta, bedtools_feature ] + dbsnp = ch_dbsnp // [ meta, dbsnp ] versions = ch_versions } From 7e311f4c887e73ee668224b30756e83b51522f37 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 19 Jul 2024 14:08:42 +0200 Subject: [PATCH 22/61] bracket swap --- subworkflows/local/reference_indexing.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/reference_indexing.nf b/subworkflows/local/reference_indexing.nf index 0e84a3858..577b5943e 100644 --- a/subworkflows/local/reference_indexing.nf +++ b/subworkflows/local/reference_indexing.nf @@ -134,7 +134,7 @@ workflow REFERENCE_INDEXING { ch_elongated_for_gunzip = ch_elongated_reference .filter{ it[1] != "" && it[2] != "" } .ifEmpty{ error "[nf-core/eager]: ERROR: Mapping with circularmapper requires either a circular target or elongated reference file for at least one reference." } - .filter( it != null ) // Remove null channel which arises if empty cause error returns null. + .filter{ it != null } // Remove null channel which arises if empty cause error returns null. // This ELONGATE_REFERENCE subworkflow also checks if the provided reference is gzipped, and unzips it if necessary. ELONGATE_REFERENCE( ch_input_from_referencesheet.circularmapper, ch_reference_for_mapping ) From 159a48a5c8af1284da53c02f9e7f84e76750abea Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 19 Jul 2024 15:43:08 +0200 Subject: [PATCH 23/61] fix syntax errors. --- subworkflows/local/reference_indexing.nf | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/reference_indexing.nf b/subworkflows/local/reference_indexing.nf index 577b5943e..7cd9512ab 100644 --- a/subworkflows/local/reference_indexing.nf +++ b/subworkflows/local/reference_indexing.nf @@ -21,7 +21,7 @@ workflow REFERENCE_INDEXING { // Warn user if they've given a reference sheet that already includes fai/dict/mapper index etc. if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && ( fasta_fai || fasta_dict || fasta_mapperindexdir )) log.warn("A TSV or CSV has been supplied to `--fasta_sheet` as well as e.g. `--fasta_fai`. --fasta_sheet CSV/TSV takes priority and --fasta_* parameters will be ignored.") - if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && ( params.mitochondrion_header || params.contamination_estimation_angsd_hapmap || params.damage_manipulation_pmdtools_reference_mask || params.damage_manipulation_pmdtools_reference_mask || params.snpcapture_bed || params.genotyping_pileupcaller_bedfile || params.genotyping_pileupcaller_snpfile || params.sexdeterrmine_bedfile || params.mapstats_bedtools_featurefile || params.genotyping_reference_ploidy || params.genotyping_gatk_dbsnp, params.fasta_circular_target, params.circularmapper_elongated_fasta, params.circularmapper_elongated_fai )) log.warn("A TSV or CSV has been supplied to `--fasta_sheet` as well as individual reference-specific input files, e.g. `--contamination_estimation_angsd_hapmap`. Input files specified in the --fasta_sheet CSV/TSV take priority and other input parameters will be ignored.") + if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && ( params.mitochondrion_header || params.contamination_estimation_angsd_hapmap || params.damage_manipulation_pmdtools_reference_mask || params.damage_manipulation_pmdtools_reference_mask || params.snpcapture_bed || params.genotyping_pileupcaller_bedfile || params.genotyping_pileupcaller_snpfile || params.sexdeterrmine_bedfile || params.mapstats_bedtools_featurefile || params.genotyping_reference_ploidy || params.genotyping_gatk_dbsnp || params.fasta_circular_target || params.circularmapper_elongated_fasta || params.circularmapper_elongated_fai )) log.warn("A TSV or CSV has been supplied to `--fasta_sheet` as well as individual reference-specific input files, e.g. `--contamination_estimation_angsd_hapmap`. Input files specified in the --fasta_sheet CSV/TSV take priority and other input parameters will be ignored.") if ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) { // If input (multi-)reference sheet supplied @@ -132,12 +132,11 @@ workflow REFERENCE_INDEXING { if ( params.mapping_tool == "circularmapper" ) { // Throw errors if required parameters are missing ch_elongated_for_gunzip = ch_elongated_reference - .filter{ it[1] != "" && it[2] != "" } + .filter{ it[1] != "" || it[2] != "" } .ifEmpty{ error "[nf-core/eager]: ERROR: Mapping with circularmapper requires either a circular target or elongated reference file for at least one reference." } - .filter{ it != null } // Remove null channel which arises if empty cause error returns null. // This ELONGATE_REFERENCE subworkflow also checks if the provided reference is gzipped, and unzips it if necessary. - ELONGATE_REFERENCE( ch_input_from_referencesheet.circularmapper, ch_reference_for_mapping ) + ELONGATE_REFERENCE( ch_reference_for_mapping, ch_elongated_reference ) ch_version = ch_versions.mix( ELONGATE_REFERENCE.out.versions ) ch_elongated_indexed_reference = ELONGATE_REFERENCE.out.circular_reference } else { From 61c5a884006303e143794fdd632a00f9b55bf2f4 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 19 Jul 2024 16:41:35 +0200 Subject: [PATCH 24/61] fix cardinality in reference maps --- workflows/eager.nf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/workflows/eager.nf b/workflows/eager.nf index c8964cec8..2291ad131 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -181,11 +181,11 @@ workflow EAGER { // ch_reference_for_mapping = REFERENCE_INDEXING.out.reference .map{ - meta, fasta, fai, dict, index, circular_target -> + meta, fasta, fai, dict, index -> [ meta, index, fasta ] } - MAP ( ch_reads_for_mapping, ch_reference_for_mapping ) + MAP ( ch_reads_for_mapping, ch_reference_for_mapping, REFERENCE_INDEXING.out.elongated_reference ) ch_versions = ch_versions.mix( MAP.out.versions ) ch_multiqc_files = ch_multiqc_files.mix( MAP.out.mqc.collect{it[1]}.ifEmpty([]) ) @@ -250,7 +250,7 @@ workflow EAGER { ch_fasta_for_deduplication = REFERENCE_INDEXING.out.reference .multiMap{ - meta, fasta, fai, dict, index, circular_target -> + meta, fasta, fai, dict, index -> fasta: [ meta, fasta ] fasta_fai: [ meta, fai ] } @@ -503,7 +503,7 @@ workflow EAGER { ch_fasta_for_damagecalculation = REFERENCE_INDEXING.out.reference .multiMap{ - meta, fasta, fai, dict, index, circular_target -> + meta, fasta, fai, dict, index -> fasta: [ meta, fasta ] fasta_fai: [ meta, fai ] } @@ -570,7 +570,7 @@ workflow EAGER { ch_reference_for_genotyping = REFERENCE_INDEXING.out.reference // Remove unnecessary files from the reference channel, so SWF doesn't break with each change to reference channel. .map { - meta, fasta, fai, dict, mapindex, circular_target -> + meta, fasta, fai, dict, mapindex -> [ meta, fasta, fai, dict ] } GENOTYPE( From 9054dd7db70faf8d79cb1fbd0662229bdacfcc9e Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 19 Jul 2024 16:42:05 +0200 Subject: [PATCH 25/61] add third input channel, and dummy output for CM for testing. --- subworkflows/local/map.nf | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/subworkflows/local/map.nf b/subworkflows/local/map.nf index 32291ffaa..c1fe076ab 100644 --- a/subworkflows/local/map.nf +++ b/subworkflows/local/map.nf @@ -18,7 +18,7 @@ workflow MAP { take: reads // [ [meta], [read1, reads2] ] or [ [meta], [read1] ] index // [ [meta], [ index ], [ fasta ] ] - elogated_index // [ [meta], [ index ], [ fasta ], [ circular_target ] ] + elogated_index // [ [meta], circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] main: ch_versions = Channel.empty() @@ -117,11 +117,11 @@ workflow MAP { ch_mapped_lane_bai = params.fasta_largeref ? SAMTOOLS_INDEX_BT2.out.csi : SAMTOOLS_INDEX_BT2.out.bai } else if ( params.mapping_tool == 'circularmapper' ) { - ch_index_for_mapping = index.map{ meta, index, fasta -> [ meta, index ] } - ch_elongated_reference_for_mapping = elogated_index.map{ meta, index, fasta, circular_target -> [ meta, index ] } - ch_reads_for_mapping = reads + // ch_index_for_mapping = index.map{ meta, index, fasta -> [ meta, index ] } + // ch_elongated_reference_for_mapping = elogated_index.map{ meta, circular_target, elongated_fasta, elongated_index -> [ meta, elongated_index ] } + // ch_reads_for_mapping = reads - CIRCULARMAPPER( ch_index_for_mapping, ch_elongated_reference_for_mapping, ch_reads_for_mapping ) + // CIRCULARMAPPER( ch_index_for_mapping, ch_elongated_reference_for_mapping, ch_reads_for_mapping ) // // Join the original and elongated references, then combine with the reads, and multiMap to ensure correct ordering of channel contents. // ch_reads_for_circularmapper = reads.map { @@ -160,8 +160,8 @@ workflow MAP { // CIRCULARMAPPER( ch_input_for_circularmapper.reads, params.elongation_factor, ch_input_for_circularmapper.reference ) // ch_versions = ch_versions.mix ( CIRCULARMAPPER.out.versions ) // // TODO - Update SWF outputs - // ch_mapped_lane_bam = CIRCULARMAPPER.out.bam - // ch_mapped_lane_bai = Channel.empty() // Circularmapper doesn't give a bai + ch_mapped_lane_bam = Channel.empty() //CIRCULARMAPPER.out.bam + ch_mapped_lane_bai = Channel.empty() // Circularmapper doesn't give a bai } From d656966327cc7e52209d1b3d4ed6d3829016e630 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 19 Jul 2024 16:42:58 +0200 Subject: [PATCH 26/61] WIP --- subworkflows/local/elongate_reference.nf | 30 ++++++++++++++---------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/subworkflows/local/elongate_reference.nf b/subworkflows/local/elongate_reference.nf index 1d29c2241..86902f508 100644 --- a/subworkflows/local/elongate_reference.nf +++ b/subworkflows/local/elongate_reference.nf @@ -5,6 +5,8 @@ include { GUNZIP as GUNZIP_ELONGATED_FASTA } from '../../modules/nf-core/gunzip/main' include { CIRCULARMAPPER_CIRCULARGENERATOR } from '../../modules/nf-core/circularmapper/circulargenerator/main' include { BWA_INDEX as BWA_INDEX_CIRCULARISED } from '../../modules/nf-core/bwa/index/main' +// TODO Check that the unzipping correctly overwrites the zipped fasta file, and that the emitted channel is constructed correctly. +// TODO Currently, nothing seems to get dumped in the emission channel, so some join must be off. workflow ELONGATE_REFERENCE { take: @@ -15,13 +17,13 @@ workflow ELONGATE_REFERENCE { ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() ch_circular_reference = Channel.empty() + ch_elongated_unzipped = Channel.empty() - // Check if the elongated reference is gzipped, and if so, unzip it. + // Check if the provided elongated reference is gzipped, and if so, unzip it. ch_elongated_branches = ch_elongated_reference .branch { meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai -> - - for_gunzip: circularmapper_elongated_fasta.extension == "gz" + for_gunzip: circularmapper_elongated_fasta != '' && circularmapper_elongated_fasta.extension == "gz" skip_gunzip: true } @@ -31,16 +33,17 @@ workflow ELONGATE_REFERENCE { [ meta, circularmapper_elongated_fasta ] } - GUNZIP_ELONGATED_FASTA( ch_elongated_for_gunzip.for_gunzip ) + GUNZIP_ELONGATED_FASTA( ch_elongated_for_gunzip ) ch_versions = ch_versions.mix( GUNZIP_ELONGATED_FASTA.out.versions.first() ) - ch_elongated_unzipped_reference = ch_elongated_branches.for_gunzip + ch_elongated_unzipped = ch_elongated_reference .join( GUNZIP_ELONGATED_FASTA.out.gunzip ) + .dump(tag: 'unzipped_fasta') .map { meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai, unzipped_fasta -> + def final_fasta = unzipped_fasta ?: circularmapper_elongated_fasta [ meta, circular_target, unzipped_fasta, circularmapper_elongated_fai ] } - .mix( ch_elongated_branches.skip_gunzip ) /* Check what fasta files we have. @@ -51,7 +54,7 @@ workflow ELONGATE_REFERENCE { 4. None of the above -> Throw error and stop execution during parameter validation */ - ch_circulargenerator_input = ch_elongated_unzipped_reference + ch_circulargenerator_input = ch_elongated_unzipped .branch{ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai -> ready: circularmapper_elongated_fasta != "" && circularmapper_elongated_fai != "" @@ -74,9 +77,9 @@ workflow ELONGATE_REFERENCE { } CIRCULARMAPPER_CIRCULARGENERATOR( - ch_circulargenerator_input.needs_elongation.fasta, - ch_circulargenerator_input.needs_elongation.elongation_factor, - ch_circulargenerator_input.needs_elongation.target + ch_references_to_elongate.fasta, + ch_references_to_elongate.elongation_factor, + ch_references_to_elongate.target ) ch_versions = ch_versions.mix( CIRCULARMAPPER_CIRCULARGENERATOR.out.versions.first() ) @@ -94,13 +97,14 @@ workflow ELONGATE_REFERENCE { ch_indexed_references = ch_input_for_circular_indexing .join( BWA_INDEX_CIRCULARISED.out.index ) - // Then put all the indexed elongated references together and emit them - ch_circular_reference = ch_circulargenerator_input.ready + // Then put all the indexed elongated references together, replace any zipped ones with the unzipped version, and emit them + ch_circular_reference = ch_circulargenerator_input.ready.dump(tag:"ready", pretty:true) .map { meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai -> [ meta, circularmapper_elongated_fasta, circularmapper_elongated_fai ] } - .mix( ch_indexed_references ) + .mix( ch_indexed_references.dump(tag:"indexed", pretty: true) ) + .dump(tag: 'circular_reference') emit: circular_reference = ch_circular_reference // [ meta, circular_target, fasta, fai ] From 073b8fd1511a61c613ea6c78bc1a571156281523 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 22 Jul 2024 17:04:41 +0200 Subject: [PATCH 27/61] module updates on elongate_reference modules --- conf/modules.config | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index c4f01939b..2d520b2c4 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -278,6 +278,34 @@ process { ] } + // Reference elongation and indexing for circular mapping + withName: GUNZIP_ELONGATED_FASTA { + publishDir = [ + path: { "${params.outdir}/reference/${meta.id}_${params.mapping_circularmapper_elongation_factor}/" }, + mode: params.publish_dir_mode, + pattern: '*[0-9].f*', + enabled: params.save_reference + ] + } + + withName: CIRCULARMAPPER_CIRCULARGENERATOR { + tag = { "${meta.id}_${params.mapping_circularmapper_elongation_factor}" } + publishDir = [ + path: { "${params.outdir}/mapping/circularmapper/" }, + mode: params.publish_dir_mode, + pattern: '*[0-9].fasta' + ] + } + + withName: BWA_INDEX_CIRCULARISED { + publishDir = [ + path: { "${params.outdir}/reference/${meta.id}_${params.mapping_circularmapper_elongation_factor}/" }, + mode: params.publish_dir_mode, + pattern: 'bwa', + enabled: params.save_reference + ] + } + // // BAM INPUT // @@ -529,16 +557,6 @@ process { ] } - withName: CIRCULARMAPPER_CIRCULARGENERATOR { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } - publishDir = [ - path: { "${params.outdir}/mapping/circularmapper/" }, - mode: params.publish_dir_mode, - pattern: '*[0-9].fasta' - ] - } - withName: CIRCULARMAPPER_REALIGNSAMFILE { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } From bce305a1e685d82a8eab937fe53442455fc2cf71 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 22 Jul 2024 17:04:59 +0200 Subject: [PATCH 28/61] actually run when the reference is not zipped --- subworkflows/local/elongate_reference.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/elongate_reference.nf b/subworkflows/local/elongate_reference.nf index 86902f508..23f71b33d 100644 --- a/subworkflows/local/elongate_reference.nf +++ b/subworkflows/local/elongate_reference.nf @@ -38,12 +38,13 @@ workflow ELONGATE_REFERENCE { ch_elongated_unzipped = ch_elongated_reference .join( GUNZIP_ELONGATED_FASTA.out.gunzip ) - .dump(tag: 'unzipped_fasta') + .dump(tag: 'unzipped_fasta', pretty: true) .map { meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai, unzipped_fasta -> def final_fasta = unzipped_fasta ?: circularmapper_elongated_fasta [ meta, circular_target, unzipped_fasta, circularmapper_elongated_fai ] } + .mix( ch_elongated_branches.skip_gunzip ) /* Check what fasta files we have. From 20f8ccfd57ac6e1433b6c9c811f00bb38a0d3d07 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Thu, 25 Jul 2024 12:44:39 +0200 Subject: [PATCH 29/61] correct cardinality of circular reference channel --- subworkflows/local/elongate_reference.nf | 33 +++++++++++------------- subworkflows/local/map.nf | 16 +++++++----- subworkflows/local/reference_indexing.nf | 2 +- 3 files changed, 26 insertions(+), 25 deletions(-) diff --git a/subworkflows/local/elongate_reference.nf b/subworkflows/local/elongate_reference.nf index 23f71b33d..d0f37cd19 100644 --- a/subworkflows/local/elongate_reference.nf +++ b/subworkflows/local/elongate_reference.nf @@ -6,12 +6,11 @@ include { GUNZIP as GUNZIP_ELONGATED_FASTA } from '../../modules/nf-core/gunz include { CIRCULARMAPPER_CIRCULARGENERATOR } from '../../modules/nf-core/circularmapper/circulargenerator/main' include { BWA_INDEX as BWA_INDEX_CIRCULARISED } from '../../modules/nf-core/bwa/index/main' // TODO Check that the unzipping correctly overwrites the zipped fasta file, and that the emitted channel is constructed correctly. -// TODO Currently, nothing seems to get dumped in the emission channel, so some join must be off. workflow ELONGATE_REFERENCE { take: ch_reference // [ meta, fasta, fai, dict, mapindex ] - ch_elongated_reference // [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] + ch_elongated_reference // [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_index ] main: ch_versions = Channel.empty() @@ -22,14 +21,14 @@ workflow ELONGATE_REFERENCE { // Check if the provided elongated reference is gzipped, and if so, unzip it. ch_elongated_branches = ch_elongated_reference .branch { - meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai -> + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_index -> for_gunzip: circularmapper_elongated_fasta != '' && circularmapper_elongated_fasta.extension == "gz" skip_gunzip: true } ch_elongated_for_gunzip = ch_elongated_branches.for_gunzip .map { - meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai -> + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_index -> [ meta, circularmapper_elongated_fasta ] } @@ -38,11 +37,10 @@ workflow ELONGATE_REFERENCE { ch_elongated_unzipped = ch_elongated_reference .join( GUNZIP_ELONGATED_FASTA.out.gunzip ) - .dump(tag: 'unzipped_fasta', pretty: true) .map { - meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai, unzipped_fasta -> + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_index, unzipped_fasta -> def final_fasta = unzipped_fasta ?: circularmapper_elongated_fasta - [ meta, circular_target, unzipped_fasta, circularmapper_elongated_fai ] + [ meta, circular_target, unzipped_fasta, circularmapper_elongated_index ] } .mix( ch_elongated_branches.skip_gunzip ) @@ -57,9 +55,9 @@ workflow ELONGATE_REFERENCE { ch_circulargenerator_input = ch_elongated_unzipped .branch{ - meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai -> - ready: circularmapper_elongated_fasta != "" && circularmapper_elongated_fai != "" - needs_index: circularmapper_elongated_fasta != "" && circularmapper_elongated_fai == "" + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_index -> + ready: circularmapper_elongated_fasta != "" && circularmapper_elongated_index != "" + needs_index: circularmapper_elongated_fasta != "" && circularmapper_elongated_index == "" needs_elongation: circularmapper_elongated_fasta == "" && circular_target != "" } @@ -68,7 +66,7 @@ workflow ELONGATE_REFERENCE { ch_references_to_elongate = ch_circulargenerator_input.needs_elongation .join( ch_reference ) .multiMap { - meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai, fasta, fai, dict, mapindex -> + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_index, fasta, fai, dict, mapindex -> def elongation_factor = params.mapping_circularmapper_elongation_factor @@ -87,7 +85,7 @@ workflow ELONGATE_REFERENCE { // Collect newly generated circular references and provided ones without an index, and index them. ch_input_for_circular_indexing = ch_circulargenerator_input.needs_index .map { - meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai -> + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_index -> [ meta, circularmapper_elongated_fasta ] } .mix( CIRCULARMAPPER_CIRCULARGENERATOR.out.fasta ) @@ -99,16 +97,15 @@ workflow ELONGATE_REFERENCE { .join( BWA_INDEX_CIRCULARISED.out.index ) // Then put all the indexed elongated references together, replace any zipped ones with the unzipped version, and emit them - ch_circular_reference = ch_circulargenerator_input.ready.dump(tag:"ready", pretty:true) + ch_circular_reference = ch_circulargenerator_input.ready .map { - meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai -> - [ meta, circularmapper_elongated_fasta, circularmapper_elongated_fai ] + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_index -> + [ meta, circularmapper_elongated_fasta, circularmapper_elongated_index ] } - .mix( ch_indexed_references.dump(tag:"indexed", pretty: true) ) - .dump(tag: 'circular_reference') + .mix( ch_indexed_references ) emit: - circular_reference = ch_circular_reference // [ meta, circular_target, fasta, fai ] + circular_reference = ch_circular_reference // [ meta, fasta, index ] versions = ch_versions mqc = ch_multiqc_files } diff --git a/subworkflows/local/map.nf b/subworkflows/local/map.nf index c1fe076ab..3ba52ef86 100644 --- a/subworkflows/local/map.nf +++ b/subworkflows/local/map.nf @@ -18,7 +18,7 @@ workflow MAP { take: reads // [ [meta], [read1, reads2] ] or [ [meta], [read1] ] index // [ [meta], [ index ], [ fasta ] ] - elogated_index // [ [meta], circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] + elogated_index // [ [meta], circularmapper_elongated_fasta, circularmapper_elongated_index ] main: ch_versions = Channel.empty() @@ -117,11 +117,16 @@ workflow MAP { ch_mapped_lane_bai = params.fasta_largeref ? SAMTOOLS_INDEX_BT2.out.csi : SAMTOOLS_INDEX_BT2.out.bai } else if ( params.mapping_tool == 'circularmapper' ) { - // ch_index_for_mapping = index.map{ meta, index, fasta -> [ meta, index ] } - // ch_elongated_reference_for_mapping = elogated_index.map{ meta, circular_target, elongated_fasta, elongated_index -> [ meta, elongated_index ] } - // ch_reads_for_mapping = reads + ch_index_for_mapping = index + ch_elongated_reference_for_mapping = elogated_index.map{ meta, elongated_fasta, elongated_index -> [ meta, elongated_index ] } + ch_reads_for_mapping = reads - // CIRCULARMAPPER( ch_index_for_mapping, ch_elongated_reference_for_mapping, ch_reads_for_mapping ) + CIRCULARMAPPER( + ch_index_for_mapping, + ch_elongated_reference_for_mapping, + ch_reads_for_mapping, + params.mapping_circularmapper_elongation_factor + ) // // Join the original and elongated references, then combine with the reads, and multiMap to ensure correct ordering of channel contents. // ch_reads_for_circularmapper = reads.map { @@ -163,7 +168,6 @@ workflow MAP { ch_mapped_lane_bam = Channel.empty() //CIRCULARMAPPER.out.bam ch_mapped_lane_bai = Channel.empty() // Circularmapper doesn't give a bai - } diff --git a/subworkflows/local/reference_indexing.nf b/subworkflows/local/reference_indexing.nf index 7cd9512ab..2fa931938 100644 --- a/subworkflows/local/reference_indexing.nf +++ b/subworkflows/local/reference_indexing.nf @@ -145,7 +145,7 @@ workflow REFERENCE_INDEXING { emit: reference = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex ] - elongated_reference = ch_elongated_indexed_reference // [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] + elongated_reference = ch_elongated_indexed_reference // [ meta, circularmapper_elongated_fasta, circularmapper_elongated_index ] mitochondrion_header = ch_mitochondrion_header // [ meta, mitochondrion_header ] hapmap = ch_hapmap // [ meta, hapmap ] pmd_masking = ch_pmd_masking // [ meta, pmd_masked_fasta, pmd_bed_for_masking ] From c73e14fd04a4124cee90c2b1a769e503ea277b85 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Thu, 25 Jul 2024 13:07:52 +0200 Subject: [PATCH 30/61] output elongated reference in reference dirs when requested --- conf/modules.config | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 2d520b2c4..6b131cfbd 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -291,9 +291,10 @@ process { withName: CIRCULARMAPPER_CIRCULARGENERATOR { tag = { "${meta.id}_${params.mapping_circularmapper_elongation_factor}" } publishDir = [ - path: { "${params.outdir}/mapping/circularmapper/" }, + path: { "${params.outdir}/reference/${meta.id}_${params.mapping_circularmapper_elongation_factor}/" }, mode: params.publish_dir_mode, - pattern: '*[0-9].fasta' + pattern: '*[0-9].fasta', + enabled: params.save_reference ] } @@ -557,6 +558,8 @@ process { ] } + // Circular mapping + // Configuration for BWA_ALN and BWA_SAMSE/SAMPE is the same as for the non-circular mapping withName: CIRCULARMAPPER_REALIGNSAMFILE { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } @@ -567,6 +570,15 @@ process { ] } + withName: ".*MAP:FASTQ_ALIGN_BWAALN_ELONGATED:SAMTOOLS_INDEX" { + tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + ext.args = { params.fasta_largeref ? "-c" : "" } + ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } + publishDir = [ + enabled: false + ] + } + // // DEDUPLICATION // From 1121cbf6ba6ea8169c2f5361bf14a3174d376e94 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Thu, 25 Jul 2024 13:37:34 +0200 Subject: [PATCH 31/61] no publishing of lane bams in CM --- conf/modules.config | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 6b131cfbd..b912a8ba2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -564,9 +564,7 @@ process { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/mapping/circularmapper/" }, - mode: params.publish_dir_mode, - pattern: '*_realigned.bam' + enabled: false ] } From cb20652f3a82e25a9ef72c298c8014cbe3392c93 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Thu, 25 Jul 2024 13:37:59 +0200 Subject: [PATCH 32/61] this works now --- subworkflows/local/circularmapper.nf | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/subworkflows/local/circularmapper.nf b/subworkflows/local/circularmapper.nf index dde31729f..da3de830f 100644 --- a/subworkflows/local/circularmapper.nf +++ b/subworkflows/local/circularmapper.nf @@ -2,16 +2,17 @@ // Run circularmapper // -include { FASTQ_ALIGN_BWAALN as FASTQ_ALIGN_BWAALN_ELONGATED } from '../../subworkflows/nf-core/fastq_align_bwaaln/main' -include { CIRCULARMAPPER_REALIGNSAMFILE } from '../../modules/nf-core/circularmapper/realignsamfile/main' -include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_REALIGNED } from '../../modules/nf-core/samtools/index/main' +include { FASTQ_ALIGN_BWAALN as FASTQ_ALIGN_BWAALN_ELONGATED } from '../../subworkflows/nf-core/fastq_align_bwaaln/main' +include { CIRCULARMAPPER_REALIGNSAMFILE } from '../../modules/nf-core/circularmapper/realignsamfile/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_REALIGNED } from '../../modules/nf-core/samtools/index/main' +include { addNewMetaFromAttributes } from '../../subworkflows/local/utils_nfcore_eager_pipeline/main' workflow CIRCULARMAPPER { take: ch_reference // channel (mandatory): [ val(meta), path(index), path(reference) ] - ch_elongated_index // channel (mandatory): [ val(meta), path(elongated_index) ] + ch_elongated_reference // channel (mandatory): [ val(meta), path(elongated_index) ] ch_fastq_reads // channel (mandatory): [ val(meta), path(reads) ]. subworkImportant: meta REQUIRES single_end` entry! - val_elongation_factor // int (mandatory): Elongation factor used for chromosome circularisation + val_elongation_factor // int (mandatory): Elongation factor used for chromosome circularisation main: ch_versions = Channel.empty() @@ -21,7 +22,7 @@ workflow CIRCULARMAPPER { ch_realigned_csis = Channel.empty() // While mapping with BWA will need the elongated reference index, RealignSAMFile apparently does NOT need the elongated reference to be present, only the elongation factor. - FASTQ_ALIGN_BWAALN_ELONGATED( ch_fastq_reads, ch_elongated_index ) + FASTQ_ALIGN_BWAALN_ELONGATED( ch_fastq_reads, ch_elongated_reference ) ch_versions = ch_versions.mix( FASTQ_ALIGN_BWAALN_ELONGATED.out.versions.first() ) ch_ref_for_realignsamfile = ch_reference @@ -37,7 +38,6 @@ workflow CIRCULARMAPPER { ch_input_for_realignsamfile = FASTQ_ALIGN_BWAALN_ELONGATED.out.bam .map{ // create meta consistent with rest of MAP workflow - // TODO: Check that the id_index is correctly set and remove the elongation factor suffix if necessary. meta, bam -> new_meta = meta + [ reference: meta.id_index ] [ new_meta, bam ] @@ -49,23 +49,21 @@ workflow CIRCULARMAPPER { .combine( ch_ref_for_realignsamfile, by: 0 ) .multiMap { ignore_me, meta, bam, ref_meta, ref_fasta -> - bam: [ metas, bam ] + bam: [ meta, bam ] fasta: [ ref_meta, ref_fasta ] } - CIRCULARMAPPER_REALIGNSAMFILE( ch_input_for_realignsamfile.bam, ch_input_for_realignsamfile.fasta, val_elongation_factor ) + CIRCULARMAPPER_REALIGNSAMFILE( ch_input_for_realignsamfile.bam, ch_input_for_realignsamfile.fasta, [ [], val_elongation_factor ] ) ch_versions = ch_versions.mix( CIRCULARMAPPER_REALIGNSAMFILE.out.versions.first() ) - ch_realigned_bams = ch_realigned_bams.mix( CIRCULARMAPPER_REALIGNSAMFILE.out.bam ) + ch_realigned_bams = CIRCULARMAPPER_REALIGNSAMFILE.out.bam SAMTOOLS_INDEX_REALIGNED( ch_realigned_bams ) ch_versions = ch_versions.mix( SAMTOOLS_INDEX_REALIGNED.out.versions.first() ) - ch_realigned_bais = ch_realigned_bais.mix( SAMTOOLS_INDEX_REALIGNED.out.bai ) - ch_realigned_csis = ch_realigned_csis.mix( SAMTOOLS_INDEX_REALIGNED.out.csi ) + ch_realigned_bais = params.fasta_largeref ? SAMTOOLS_INDEX_REALIGNED.out.csi : SAMTOOLS_INDEX_REALIGNED.out.bai emit: - bam = ch_realigned_bams // [ val(meta), path(bam) ] - bai = ch_realigned_bais // [ val(meta), path(bai) ] - csi = ch_realigned_csis // [ val(meta), path(csi) ] + bam = ch_realigned_bams // [ meta, bam ] + bai = ch_realigned_bais // [ meta, bai/csi ] versions = ch_versions mqc = ch_multiqc_files } From 224994ffa85c28c281a1e772021669fc4a9f6b45 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Thu, 25 Jul 2024 13:39:05 +0200 Subject: [PATCH 33/61] add circularmapper. fix issue with channel join before flagstat for large references --- subworkflows/local/map.nf | 62 +++++++-------------------------------- 1 file changed, 11 insertions(+), 51 deletions(-) diff --git a/subworkflows/local/map.nf b/subworkflows/local/map.nf index 3ba52ef86..0854688f1 100644 --- a/subworkflows/local/map.nf +++ b/subworkflows/local/map.nf @@ -117,56 +117,16 @@ workflow MAP { ch_mapped_lane_bai = params.fasta_largeref ? SAMTOOLS_INDEX_BT2.out.csi : SAMTOOLS_INDEX_BT2.out.bai } else if ( params.mapping_tool == 'circularmapper' ) { - ch_index_for_mapping = index - ch_elongated_reference_for_mapping = elogated_index.map{ meta, elongated_fasta, elongated_index -> [ meta, elongated_index ] } - ch_reads_for_mapping = reads + ch_elongated_reference_for_mapping = elogated_index + .map { + meta, elongated_fasta, elongated_index -> + [ meta, elongated_index ] + } - CIRCULARMAPPER( - ch_index_for_mapping, - ch_elongated_reference_for_mapping, - ch_reads_for_mapping, - params.mapping_circularmapper_elongation_factor - ) - - // // Join the original and elongated references, then combine with the reads, and multiMap to ensure correct ordering of channel contents. - // ch_reads_for_circularmapper = reads.map { - // // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute - // addNewMetaFromAttributes( it, "reference" , "reference" , false ) - // } - - // ch_input_for_circularmapper = index.join( elogated_index ) - // .map { - // meta, index, fasta, elongated_index, elongated_fasta, circular_target -> - // [ meta, index, fasta , elongated_index, elongated_fasta ] - // } - // .map { - // // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute - // addNewMetaFromAttributes( it, "id" , "reference" , false ) - // } - // .combine( ch_reads_for_circularmapper, by: 0) - // .multiMap { - // ignore_me, meta, index, fasta, elongated_index, elongated_fasta, circular_target, meta2, fasta, reads -> - // reads: [ meta, reads ] - // reference: [ meta, index, fasta ] - // elongated_reference: [meta, elongated_index , elongated_index] - // } - - // Reference elongation and indexing takes place in the reference_indexing swf. - // Circularmapper takes non-elongated AND elongated references and reads as input (i think. wait for Alex's reply). - - // ch_input_for_circularmapper = reads - // .combine(index.map{ meta, index, fasta -> [ meta, fasta ] }) - // .dump(tag:"CM Inputs", pretty:true) - // .multiMap { - // meta, reads, meta2, fasta -> - // reads: [ meta, reads ] - // reference: [ meta2, fasta ] - // } - // CIRCULARMAPPER( ch_input_for_circularmapper.reads, params.elongation_factor, ch_input_for_circularmapper.reference ) - // ch_versions = ch_versions.mix ( CIRCULARMAPPER.out.versions ) - // // TODO - Update SWF outputs - ch_mapped_lane_bam = Channel.empty() //CIRCULARMAPPER.out.bam - ch_mapped_lane_bai = Channel.empty() // Circularmapper doesn't give a bai + CIRCULARMAPPER( index, ch_elongated_reference_for_mapping, reads, params.mapping_circularmapper_elongation_factor ) + ch_versions = ch_versions.mix ( CIRCULARMAPPER.out.versions ) + ch_mapped_lane_bam = CIRCULARMAPPER.out.bam + ch_mapped_lane_bai = CIRCULARMAPPER.out.bai } @@ -200,7 +160,7 @@ workflow MAP { ch_mapped_bai = params.fasta_largeref ? SAMTOOLS_INDEX_MERGED_LANES.out.csi : SAMTOOLS_INDEX_MERGED_LANES.out.bai ch_versions.mix( SAMTOOLS_INDEX_MERGED_LANES.out.versions ) - ch_input_for_flagstat = SAMTOOLS_SORT_MERGED_LANES.out.bam.join( SAMTOOLS_INDEX_MERGED_LANES.out.bai, failOnMismatch: true ) + ch_input_for_flagstat = ch_mapped_bam.join( ch_mapped_bai, failOnMismatch: true ) SAMTOOLS_FLAGSTAT_MAPPED ( ch_input_for_flagstat ) ch_versions.mix( SAMTOOLS_FLAGSTAT_MAPPED.out.versions.first() ) @@ -208,7 +168,7 @@ workflow MAP { emit: bam = ch_mapped_bam // [ [ meta ], bam ] - bai = ch_mapped_bai // [ [ meta ], bai ] + bai = ch_mapped_bai // [ [ meta ], bai/csi ] flagstat = SAMTOOLS_FLAGSTAT_MAPPED.out.flagstat // [ [ meta ], stats ] mqc = ch_multiqc_files versions = ch_versions From 63d08883a92e6d8fe844742d5b78b55675fb456d Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Thu, 25 Jul 2024 14:46:03 +0200 Subject: [PATCH 34/61] remove completed TODOs --- subworkflows/local/elongate_reference.nf | 1 - subworkflows/local/reference_indexing_multi.nf | 1 - subworkflows/local/reference_indexing_single.nf | 1 - 3 files changed, 3 deletions(-) diff --git a/subworkflows/local/elongate_reference.nf b/subworkflows/local/elongate_reference.nf index d0f37cd19..9040ea23f 100644 --- a/subworkflows/local/elongate_reference.nf +++ b/subworkflows/local/elongate_reference.nf @@ -5,7 +5,6 @@ include { GUNZIP as GUNZIP_ELONGATED_FASTA } from '../../modules/nf-core/gunzip/main' include { CIRCULARMAPPER_CIRCULARGENERATOR } from '../../modules/nf-core/circularmapper/circulargenerator/main' include { BWA_INDEX as BWA_INDEX_CIRCULARISED } from '../../modules/nf-core/bwa/index/main' -// TODO Check that the unzipping correctly overwrites the zipped fasta file, and that the emitted channel is constructed correctly. workflow ELONGATE_REFERENCE { take: diff --git a/subworkflows/local/reference_indexing_multi.nf b/subworkflows/local/reference_indexing_multi.nf index 628d58a90..652380331 100644 --- a/subworkflows/local/reference_indexing_multi.nf +++ b/subworkflows/local/reference_indexing_multi.nf @@ -7,7 +7,6 @@ include { BWA_INDEX } from '../../modules/nf-core/bwa/inde include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main' include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' include { PICARD_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/picard/createsequencedictionary/main' -// TODO missing: circulargeneraotr? workflow REFERENCE_INDEXING_MULTI { diff --git a/subworkflows/local/reference_indexing_single.nf b/subworkflows/local/reference_indexing_single.nf index f4c9e42cd..10164f31e 100644 --- a/subworkflows/local/reference_indexing_single.nf +++ b/subworkflows/local/reference_indexing_single.nf @@ -8,7 +8,6 @@ include { BWA_INDEX } from '../../modules/nf-core/bwa/inde include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main' include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' include { PICARD_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/picard/createsequencedictionary/main' -// TODO missing: circulargeneraotr? workflow REFERENCE_INDEXING_SINGLE { From 36fc7c6386a0e5944610c78d4dd95f386670d324 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Thu, 25 Jul 2024 16:35:39 +0200 Subject: [PATCH 35/61] add CM to CI --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8c7a9c2a5..45c6b1106 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,7 +31,7 @@ jobs: - "-profile test,docker --preprocessing_tool adapterremoval --preprocessing_adapterlist 'https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/adapterremoval/adapterremoval_adapterlist.txt' --sequencing_qc_tool falco --run_genotyping --genotyping_tool 'freebayes' --genotyping_source 'raw'" - "-profile test,docker --mapping_tool bwamem --run_mapdamage_rescaling --run_pmd_filtering --run_trim_bam --run_genotyping --genotyping_tool 'ug' --genotyping_source 'trimmed'" - "-profile test,docker --mapping_tool bowtie2 --damagecalculation_tool mapdamage --damagecalculation_mapdamage_downsample 100 --run_genotyping --genotyping_tool 'hc' --genotyping_source 'raw'" - - "-profile test,docker --skip_preprocessing --convert_inputbam" + - "-profile test,docker --mapping_tool circularmapper --skip_preprocessing --convert_inputbam --fasta_circular_target 'NC_007596.2' --mapping_circularmapper_elongation_factor 500" - "-profile test_humanbam,docker --run_mtnucratio --run_contamination_estimation_angsd --snpcapture_bed 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz' --run_genotyping --genotyping_tool 'pileupcaller' --genotyping_source 'raw'" - "-profile test_humanbam,docker --run_sexdeterrmine --run_genotyping --genotyping_tool 'angsd' --genotyping_source 'raw'" - "-profile test_multiref,docker" ## TODO add damage manipulation here instead once it goes multiref From 2f78995d1f147429be836003bf7c789fa4f72e91 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 26 Jul 2024 11:14:38 +0200 Subject: [PATCH 36/61] minor tweak to avoid `null` in file names --- conf/modules.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 529367088..11c256adf 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -433,7 +433,7 @@ process { withName: BWA_ALN { tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } ext.args = { "-n ${params.mapping_bwaaln_n} -k ${params.mapping_bwaaln_k} -l ${params.mapping_bwaaln_l} -o ${params.mapping_bwaaln_o}" } - ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } + ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.index_id}" } publishDir = [ enabled: false ] @@ -446,7 +446,7 @@ process { [ "-r '@RG\\tID:ILLUMINA-${meta.sample_id}_${meta.library_id}\\tSM:${meta.sample_id}\\tLB:${meta.library_id}\\tPL:illumina\\tPU:ILLUMINA-${meta.library_id}-${meta.strandedness}_stranded-${se_pe_string}'" ].join(' ').trim() } - ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } + ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.id_index}" } publishDir = [ enabled: false ] From edccda3c63de381b106920fc957703ccc26c1d5d Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 26 Jul 2024 11:43:19 +0200 Subject: [PATCH 37/61] typo --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 11c256adf..a8a40dc63 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -433,7 +433,7 @@ process { withName: BWA_ALN { tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } ext.args = { "-n ${params.mapping_bwaaln_n} -k ${params.mapping_bwaaln_k} -l ${params.mapping_bwaaln_l} -o ${params.mapping_bwaaln_o}" } - ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.index_id}" } + ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.id_index}" } publishDir = [ enabled: false ] From 1979181e3830a1037b2d44414193b65c67cedb22 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 26 Jul 2024 11:58:27 +0200 Subject: [PATCH 38/61] add manual tests --- docs/development/manual_tests.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/docs/development/manual_tests.md b/docs/development/manual_tests.md index eea6cab14..02b6c6022 100644 --- a/docs/development/manual_tests.md +++ b/docs/development/manual_tests.md @@ -281,6 +281,23 @@ nextflow run ../main.nf -profile test,singularity --outdir ./results -resume -du ``` +### CircularMapper + +```bash +## CircularMapper with reference elongation +## Expect: Reference elongation is ran, and circularmapper SWF is ran. +## Check: Expect the elongated reference and BWA index directory within the `reference` directory. Also 2 bam files together with their BAIs and Flagstats in the `mapping/circularmapper` directory. +nextflow run ../main.nf -profile test,docker --outdir ./results -w work/ -resume -dump-channels -ansi-log false --fasta_circular_target 'NC_007596.2' --mapping_tool circularmapper --save_reference +``` + +```bash +## CircularMapper with an already elongated reference. Big reference flag. Also check that bwa_aln flags also propagate when using circularmapper. +## Expect: Reference elongation is NOT ran, and circularmapper SWF is ran. +## Check: 2 bam files together with their CSIs and Flagstats in the `mapping/circularmapper` directory. +## Also check the BAM headers for the -k and -n flags during BWA ALN. +nextflow run main.nf -profile test,docker --outdir ./results -w work/ -resume -dump-channels -ansi-log false --fasta_circular_target 'NC_007596.2' --mapping_tool circularmapper --mapping_circularmapper_elongated_fasta data/reference/Mammoth_MT_Krause_500/Mammoth_MT_Krause_500.fasta --mapping_circularmapper_elongated_fai data/reference/Mammoth_MT_Krause_500/bwa --fasta_largeref --mapping_bwaaln_n 0.05 --mapping_bwaaln_k 3 +``` + ## Host Removal All possible parameters From 585de6b42b3833fedb67ef8519a6594dcc0f7c1c Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 2 Aug 2024 11:26:51 +0200 Subject: [PATCH 39/61] incorporate review suggestions. put mapping output within tool subdirectory --- conf/modules.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index a8a40dc63..6b30d4153 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -293,7 +293,7 @@ process { publishDir = [ path: { "${params.outdir}/reference/${meta.id}_${params.mapping_circularmapper_elongation_factor}/" }, mode: params.publish_dir_mode, - pattern: '*[0-9].fasta', + pattern: '*_*[0-9].fasta', enabled: params.save_reference ] } @@ -531,7 +531,7 @@ process { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_sorted" } publishDir = [ - path: { "${params.outdir}/mapping/" }, + path: { "${params.outdir}/mapping/${params.mapping_tool}" }, mode: params.publish_dir_mode, pattern: '*.{bam}' ] From ab2d40ed6a692ff0517e8c9f4d348c79481e5e14 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 2 Aug 2024 11:33:51 +0200 Subject: [PATCH 40/61] update output.md --- docs/output.md | 44 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/docs/output.md b/docs/output.md index b29959f56..ab4f0eb59 100644 --- a/docs/output.md +++ b/docs/output.md @@ -95,6 +95,25 @@ Depending on what is supplied by the user, and if `--save_reference` is supplied It is highly recommend to move these files to a central location or cache directory on your machine to facilitate resume of the indices across different pipeline runs. In many cases indexing the reference genome for alignment can be the longest step of a pipeline run, therefore re-using indices in future runs (supplied to the pipeline with flags such as `--fasta_fai`, `--fasta_dict`, etc. or added to the reference sheet provided to `--fasta`) can greatly speed up analyses on other samples. +#### Reference Elongation + +
+Output files + +- `reference/` + - `_/` + - `*.{fasta,fna,fa,fa}`: Uncompressed input FASTA file (if supplied to pipeline gzipped). + - `bwa/`: + - `*.fasta.{amb,ann,bwt,pac,sa}`: BWA aligner(s) reference index files from `bwa index`. + +
+ +Mapping with `circularmapper` requires an elongated reference built by [CircularMapper/CircularGenerator](https://github.com/apeltzer/CircularMapper). CircularGenerator elongates the `--fasta_circular_target` of a supplied reference genome fasta by the number of base pairs specified in `--fasta_circularmapper_elongationfactor`. + +Depending on what is supplied by the user, and if `--save_reference` is supplied, this directory will contain the elongated reference fasta, as well as its corresponding bwa reference index files. + +It is highly recommend to move these files to a central location or cache directory on your machine to facilitate resume of the indices across different pipeline runs. In many cases indexing the reference genome for alignment can be the longest step of a pipeline run, therefore re-using indices in future runs (supplied to the pipeline with flags such as `--fasta_circularmapper_elongatedfasta`, `--fasta_circularmapper_elongatedindex`, etc. or added to the reference sheet provided to `--fasta`) can greatly speed up analyses on other samples. + ### Preprocessing #### Falco @@ -186,6 +205,21 @@ The resulting FASTQ files will only be present in your results directory if you [Bowtie 2](https://bowtie-bio.sourceforge.net/bowtie2/manual.shtml) is an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences. It is particularly good at aligning reads of about 50 up to 100s of characters to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the genome with an FM Index (based on the Burrows-Wheeler Transform or BWT) to keep its memory footprint small and supports gapped, local, and paired-end alignment modes. +#### CircularMapper + +
+Output files + +- `mapping/circularmapper/` + + - `*.bam`: Sorted reads aligned against an elongated reference genome in BAM format with no additional filtering. + - `*.{bai,csi}`: Index file corresponding to a BAM file which is for faster downstream steps (e.g. SAMtools). + - `*.flagstat`: Statistics of aligned reads from SAMtools `flagstat`. + +
+ +[CircularMapper RealignSAMFile](https://github.com/apeltzer/CircularMapper/tree/master) is an extension to `bwa aln` for realigning reads mapped to circularised contigs. First, an elogated/circularised reference is built using CircularGenerator, then reads are mapped to this reference using BWA ALN. The resulting BAM file is then realigned using CircularMapper RealignSAMFile. The reference coordinates of this BAM file have been adjusted to those of the original reference genome (prior to elongation). + ### Host Removal
@@ -635,13 +669,3 @@ When using pileupCaller for genotyping, single-stranded and double-stranded libr [ANGSD](http://www.popgen.dk/angsd/index.php/ANGSD) is a software for analyzing next generation sequencing data. It can estimate genotype likelihoods and allele frequencies from next-generation sequencing data. The output provided is a bgzipped genotype likelihood file, containing likelihoods across all samples per reference. Users can specify the model used for genotype likelihood estimation, as well as the output format. For more information on the available options, see the [ANGSD](https://www.popgen.dk/angsd/index.php/Genotype_Likelihoods). -#### CircularMapper - -
-Output files - -- `mapping/circularmapper` - - - `*realigned.bam`: BAM file realigned to the extended reference - -
From c028b6a5fa491429bde05340b7d91af7ef0c70d7 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 2 Aug 2024 11:35:40 +0200 Subject: [PATCH 41/61] add mapping tool subdirectory within mapping --- docs/output.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/output.md b/docs/output.md index ab4f0eb59..c5a87feb1 100644 --- a/docs/output.md +++ b/docs/output.md @@ -180,7 +180,7 @@ The resulting FASTQ files will only be present in your results directory if you
Output files -- `mapping/` +- `mapping/bwa{aln,mem}/` - `*.bam`: Sorted reads aligned against a reference genome in BAM format with no additional filtering. - `*.{bai,csi}`: Index file corresponding to a BAM file which is for faster downstream steps (e.g. SAMtools). @@ -195,7 +195,7 @@ The resulting FASTQ files will only be present in your results directory if you
Output files -- `mapping/` +- `mapping/bowtie2/` - `*.bam`: Sorted reads aligned against a reference genome in BAM format with no additional filtering. - `*.{bai,csi}`: Index file corresponding to a BAM file which is for faster downstream steps (e.g. SAMtools). From bdf0d8006fc465ac58870ad0bccb144f27715ce5 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 2 Aug 2024 11:59:14 +0200 Subject: [PATCH 42/61] mapped bams/bais/csi/flagstat all in tool subdir --- conf/modules.config | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 6b30d4153..03a1811f9 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -531,7 +531,7 @@ process { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_sorted" } publishDir = [ - path: { "${params.outdir}/mapping/${params.mapping_tool}" }, + path: { "${params.outdir}/mapping/${params.mapping_tool}/" }, mode: params.publish_dir_mode, pattern: '*.{bam}' ] @@ -542,7 +542,7 @@ process { ext.args = { params.fasta_largeref ? "-c" : "" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/mapping/" }, + path: { "${params.outdir}/mapping/${params.mapping_tool}/" }, mode: params.publish_dir_mode, pattern: '*.{bai,csi}' ] @@ -552,7 +552,7 @@ process { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/mapping/" }, + path: { "${params.outdir}/mapping/${params.mapping_tool}/" }, mode: params.publish_dir_mode, pattern: '*.flagstat' ] From ed4d753a214933af119d3ee69a9de64ea62eb0a9 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 2 Aug 2024 14:04:07 +0200 Subject: [PATCH 43/61] force bwa indexing of original ref when circularmapper is used --- subworkflows/local/reference_indexing_multi.nf | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/subworkflows/local/reference_indexing_multi.nf b/subworkflows/local/reference_indexing_multi.nf index 652380331..313f3b59e 100644 --- a/subworkflows/local/reference_indexing_multi.nf +++ b/subworkflows/local/reference_indexing_multi.nf @@ -180,7 +180,7 @@ workflow REFERENCE_INDEXING_MULTI { remainder: [ meta, fasta, fai, dict ] } - if ( params.mapping_tool == "bwaaln" || params.mapping_tool == "bwamem" ) { + if ( params.mapping_tool == "bwaaln" || params.mapping_tool == "bwamem" || params.mapping_tool == "circularmapper" ) { BWA_INDEX ( ch_mapindex_input.index ) ch_version = ch_versions.mix( BWA_INDEX.out.versions ) ch_indexed_forremap = BWA_INDEX.out.index @@ -188,8 +188,6 @@ workflow REFERENCE_INDEXING_MULTI { BOWTIE2_BUILD ( ch_mapindex_input.index ) ch_version = ch_versions.mix( BOWTIE2_BUILD.out.versions ) ch_indexed_forremap = BOWTIE2_BUILD.out.index - } else if ( params.mapping_tool == "circularmapper" ) { - println("CircularMapper Indexing Not Yet Implemented") } ch_indexed_formix = ch_indexed_forremap From ef32d616b4832238fa759c3bbf5204e2fb6d3bbe Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 2 Aug 2024 15:36:17 +0200 Subject: [PATCH 44/61] Rename Mapped Flagstat for consistency with other modules and outputs of MAP swf --- conf/modules.config | 4 ++-- subworkflows/local/map.nf | 30 +++++++++++++++--------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 03a1811f9..05c999e42 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -548,9 +548,9 @@ process { ] } - withName: SAMTOOLS_FLAGSTAT_MAPPED { + withName: SAMTOOLS_FLAGSTAT_MERGED_LANES { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } + ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_sorted" } publishDir = [ path: { "${params.outdir}/mapping/${params.mapping_tool}/" }, mode: params.publish_dir_mode, diff --git a/subworkflows/local/map.nf b/subworkflows/local/map.nf index 0854688f1..d66009787 100644 --- a/subworkflows/local/map.nf +++ b/subworkflows/local/map.nf @@ -2,17 +2,17 @@ // Prepare reference indexing for downstream // -include { SEQKIT_SPLIT2 } from '../../modules/nf-core/seqkit/split2/main' -include { FASTQ_ALIGN_BWAALN } from '../../subworkflows/nf-core/fastq_align_bwaaln/main' -include { BWA_MEM } from '../../modules/nf-core/bwa/mem/main' -include { BOWTIE2_ALIGN } from '../../modules/nf-core/bowtie2/align/main' -include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_LANES } from '../../modules/nf-core/samtools/merge/main' -include { SAMTOOLS_SORT as SAMTOOLS_SORT_MERGED_LANES } from '../../modules/nf-core/samtools/sort/main' -include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MEM } from '../../modules/nf-core/samtools/index/main' -include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_BT2 } from '../../modules/nf-core/samtools/index/main' -include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MERGED_LANES } from '../../modules/nf-core/samtools/index/main' -include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_MAPPED } from '../../modules/nf-core/samtools/flagstat/main' -include { CIRCULARMAPPER } from '../../subworkflows/local/circularmapper' +include { SEQKIT_SPLIT2 } from '../../modules/nf-core/seqkit/split2/main' +include { FASTQ_ALIGN_BWAALN } from '../../subworkflows/nf-core/fastq_align_bwaaln/main' +include { BWA_MEM } from '../../modules/nf-core/bwa/mem/main' +include { BOWTIE2_ALIGN } from '../../modules/nf-core/bowtie2/align/main' +include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_LANES } from '../../modules/nf-core/samtools/merge/main' +include { SAMTOOLS_SORT as SAMTOOLS_SORT_MERGED_LANES } from '../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MEM } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_BT2 } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MERGED_LANES } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_MERGED_LANES } from '../../modules/nf-core/samtools/flagstat/main' +include { CIRCULARMAPPER } from '../../subworkflows/local/circularmapper' workflow MAP { take: @@ -162,14 +162,14 @@ workflow MAP { ch_input_for_flagstat = ch_mapped_bam.join( ch_mapped_bai, failOnMismatch: true ) - SAMTOOLS_FLAGSTAT_MAPPED ( ch_input_for_flagstat ) - ch_versions.mix( SAMTOOLS_FLAGSTAT_MAPPED.out.versions.first() ) - ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_FLAGSTAT_MAPPED.out.flagstat ) + SAMTOOLS_FLAGSTAT_MERGED_LANES ( ch_input_for_flagstat ) + ch_versions.mix( SAMTOOLS_FLAGSTAT_MERGED_LANES .out.versions.first() ) + ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_FLAGSTAT_MERGED_LANES .out.flagstat ) emit: bam = ch_mapped_bam // [ [ meta ], bam ] bai = ch_mapped_bai // [ [ meta ], bai/csi ] - flagstat = SAMTOOLS_FLAGSTAT_MAPPED.out.flagstat // [ [ meta ], stats ] + flagstat = SAMTOOLS_FLAGSTAT_MERGED_LANES .out.flagstat // [ [ meta ], stats ] mqc = ch_multiqc_files versions = ch_versions From e7388e818a3ad62cccb8a9e16d5268e5e513f601 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 2 Aug 2024 15:36:33 +0200 Subject: [PATCH 45/61] Rename CM parameters --- assets/schema_fasta.json | 10 ++-- conf/modules.config | 10 ++-- docs/development/manual_tests.md | 13 +++-- nextflow.config | 49 ++++++++--------- nextflow_schema.json | 54 ++++++++++--------- subworkflows/local/elongate_reference.nf | 2 +- subworkflows/local/map.nf | 2 +- .../local/reference_indexing_multi.nf | 14 ++--- .../local/reference_indexing_single.nf | 12 ++--- .../local/utils_nfcore_eager_pipeline/main.nf | 2 +- 10 files changed, 91 insertions(+), 77 deletions(-) diff --git a/assets/schema_fasta.json b/assets/schema_fasta.json index d89310422..4256a542e 100644 --- a/assets/schema_fasta.json +++ b/assets/schema_fasta.json @@ -48,19 +48,19 @@ "pattern": "^\\S+$", "errorMessage": "The headers of the chromosome to be extended by circularmapper must not contain any spaces and no leading '>'." }, - "circularmapper_elongated_fasta": { + "circularmapper_elongatedfasta": { "type": "string", "format": "file-path", "pattern": "^\\S+\\.f(na|asta|a|as)(\\.gz)?$", "exists": true, "errorMessage": "The elongated Fasta files for the mapping reference must be provided with file extensions '.fasta', '.fa', '.fas', '.fna', '.fasta.gz','.fa.gz','.fas.gz', '.fna.gz' and cannot contain any spaces." }, - "circularmapper_elongated_fai": { + "circularmapper_elongatedindex": { "type": "string", - "format": "file-path", - "pattern": "^\\S+\\.fai$", + "format": "directory-path", + "pattern": "^\\S+$", "exists": true, - "errorMessage": "Elongated fasta index files for the mapping reference cannot have any spaces and must have file extension '.fai'." + "errorMessage": "The directories of the index files for the elongated mapping reference for circularmapper must not contain any spaces and have file extensions ''." }, "mitochondrion_header": { "type": "string", diff --git a/conf/modules.config b/conf/modules.config index 05c999e42..0f9a9ed54 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -281,17 +281,17 @@ process { // Reference elongation and indexing for circular mapping withName: GUNZIP_ELONGATED_FASTA { publishDir = [ - path: { "${params.outdir}/reference/${meta.id}_${params.mapping_circularmapper_elongation_factor}/" }, + path: { "${params.outdir}/reference/${meta.id}_${params.fasta_circularmapper_elongationfactor}/" }, mode: params.publish_dir_mode, - pattern: '*[0-9].f*', + pattern: '*_*[0-9].f*', enabled: params.save_reference ] } withName: CIRCULARMAPPER_CIRCULARGENERATOR { - tag = { "${meta.id}_${params.mapping_circularmapper_elongation_factor}" } + tag = { "${meta.id}_${params.fasta_circularmapper_elongationfactor}" } publishDir = [ - path: { "${params.outdir}/reference/${meta.id}_${params.mapping_circularmapper_elongation_factor}/" }, + path: { "${params.outdir}/reference/${meta.id}_${params.fasta_circularmapper_elongationfactor}/" }, mode: params.publish_dir_mode, pattern: '*_*[0-9].fasta', enabled: params.save_reference @@ -300,7 +300,7 @@ process { withName: BWA_INDEX_CIRCULARISED { publishDir = [ - path: { "${params.outdir}/reference/${meta.id}_${params.mapping_circularmapper_elongation_factor}/" }, + path: { "${params.outdir}/reference/${meta.id}_${params.fasta_circularmapper_elongationfactor}/" }, mode: params.publish_dir_mode, pattern: 'bwa', enabled: params.save_reference diff --git a/docs/development/manual_tests.md b/docs/development/manual_tests.md index 02b6c6022..50334e5b4 100644 --- a/docs/development/manual_tests.md +++ b/docs/development/manual_tests.md @@ -287,15 +287,22 @@ nextflow run ../main.nf -profile test,singularity --outdir ./results -resume -du ## CircularMapper with reference elongation ## Expect: Reference elongation is ran, and circularmapper SWF is ran. ## Check: Expect the elongated reference and BWA index directory within the `reference` directory. Also 2 bam files together with their BAIs and Flagstats in the `mapping/circularmapper` directory. -nextflow run ../main.nf -profile test,docker --outdir ./results -w work/ -resume -dump-channels -ansi-log false --fasta_circular_target 'NC_007596.2' --mapping_tool circularmapper --save_reference +nextflow run main.nf -profile test,docker --outdir ./results -w work/ -resume -dump-channels -ansi-log false --fasta_circular_target 'NC_007596.2' --mapping_tool circularmapper --save_reference ``` ```bash ## CircularMapper with an already elongated reference. Big reference flag. Also check that bwa_aln flags also propagate when using circularmapper. ## Expect: Reference elongation is NOT ran, and circularmapper SWF is ran. ## Check: 2 bam files together with their CSIs and Flagstats in the `mapping/circularmapper` directory. -## Also check the BAM headers for the -k and -n flags during BWA ALN. -nextflow run main.nf -profile test,docker --outdir ./results -w work/ -resume -dump-channels -ansi-log false --fasta_circular_target 'NC_007596.2' --mapping_tool circularmapper --mapping_circularmapper_elongated_fasta data/reference/Mammoth_MT_Krause_500/Mammoth_MT_Krause_500.fasta --mapping_circularmapper_elongated_fai data/reference/Mammoth_MT_Krause_500/bwa --fasta_largeref --mapping_bwaaln_n 0.05 --mapping_bwaaln_k 3 +## Also check the .command.sh for the -k and -n flags during BWA ALN. +nextflow run main.nf -profile test,docker --outdir ./results -w work/ -resume -dump-channels -ansi-log false --fasta_circular_target 'NC_007596.2' --mapping_tool circularmapper --fasta_circularmapper_elongatedfasta data/reference/Mammoth_MT_Krause_500/Mammoth_MT_Krause_500.fasta --fasta_circularmapper_elongatedindex data/reference/Mammoth_MT_Krause_500/bwa --fasta_largeref --mapping_bwaaln_n 0.05 --mapping_bwaaln_k 3 +``` + +```bash +## Multiref with circularmapper. reference_sheet_multiref.csv edited to include elongated reference and index from first CM manual test for Mammoth_MT, and remove the human reference (save on runtime). Will still evaluate through reference_indexing_multi. +## Expect: No elongation for Mammoth MT. +## Check: 2 bam files together with their CSIs and Flagstats in the `mapping/circularmapper` directory. (6 files total) +nextflow run main.nf -profile test_multiref,docker --outdir ./results -w work/ -resume -dump-channels -ansi-log false --fasta_sheet /Users/lamnidis/Software/github/jbv2/eager/data/reference/reference_sheet_multiref.csv --mapping_tool circularmapper --fasta_largeref --mapping_bwaaln_n 0.05 --mapping_bwaaln_k 3 ``` ## Host Removal diff --git a/nextflow.config b/nextflow.config index 467aeba3f..0ad84d596 100644 --- a/nextflow.config +++ b/nextflow.config @@ -29,13 +29,16 @@ params { multiqc_methods_description = null // Main references - fasta = null - fasta_fai = null - fasta_dict = null - fasta_mapperindexdir = null - fasta_circular_target = null - fasta_largeref = false - fasta_sheet = null + fasta = null + fasta_fai = null + fasta_dict = null + fasta_mapperindexdir = null + fasta_circular_target = null + fasta_largeref = false + fasta_sheet = null + fasta_circularmapper_elongationfactor = 500 + fasta_circularmapper_elongatedfasta = null + fasta_circularmapper_elongatedindex = null // Shard Fastq options run_fastq_sharding = false @@ -108,23 +111,21 @@ params { preprocessing_adapterremoval_qualitymax = 41 // Mapping - mapping_tool = 'bwaaln' - mapping_bwaaln_n = 0.01 // From Oliva et al. 2021 (10.1093/bib/bbab076) - mapping_bwaaln_k = 2 - mapping_bwaaln_l = 1024 // From Oliva et al. 2021 (10.1093/bib/bbab076) - mapping_bwaaln_o = 2 // From Oliva et al. 2021 (10.1093/bib/bbab076) - mapping_bwamem_k = 19 - mapping_bwamem_r = 1.5 - mapping_bowtie2_alignmode = 'local' - mapping_bowtie2_sensitivity = 'sensitive' - mapping_bowtie2_n = 0 - mapping_bowtie2_l = 20 - mapping_bowtie2_trim5 = 0 - mapping_bowtie2_trim3 = 0 - mapping_bowtie2_maxins = 500 - mapping_circularmapper_elongation_factor = 500 - mapping_circularmapper_elongated_fasta = null - mapping_circularmapper_elongated_fai = null + mapping_tool = 'bwaaln' + mapping_bwaaln_n = 0.01 // From Oliva et al. 2021 (10.1093/bib/bbab076) + mapping_bwaaln_k = 2 + mapping_bwaaln_l = 1024 // From Oliva et al. 2021 (10.1093/bib/bbab076) + mapping_bwaaln_o = 2 // From Oliva et al. 2021 (10.1093/bib/bbab076) + mapping_bwamem_k = 19 + mapping_bwamem_r = 1.5 + mapping_bowtie2_alignmode = 'local' + mapping_bowtie2_sensitivity = 'sensitive' + mapping_bowtie2_n = 0 + mapping_bowtie2_l = 20 + mapping_bowtie2_trim5 = 0 + mapping_bowtie2_trim3 = 0 + mapping_bowtie2_maxins = 500 + mapping_circularmapper_circularfilter = false // BAM Filtering run_bamfiltering = false diff --git a/nextflow_schema.json b/nextflow_schema.json index f87749bd8..d2cc0d72a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -84,6 +84,12 @@ "help_text": "For most people this will likely be the same directory that contains the file you provided to `--fasta`.\n\nIf you want to use pre-existing `bwa index` indices, the directory should contain files ending in '.amb' '.ann' '.bwt'. If you want to use pre-existing `bowtie2 build` indices, the directory should contain files ending in'.1.bt2', '.2.bt2', '.rev.1.bt2'.\n\nIn any case do not include the files themselves in the path. nf-core/eager will automagically detect the index files by searching for the FASTA filename with the corresponding `bwa index`/`bowtie2 build` file suffixes. If not supplied, the indices will be generated for you.\n\n", "fa_icon": "fas fa-folder-open" }, + "fasta_largeref": { + "type": "boolean", + "description": "Specify to generate '.csi' BAM indices instead of '.bai' for larger reference genomes.", + "help_text": "This parameter is required to be set for large reference genomes. If your reference genome is larger than 3.5GB, the `samtools index` calls in the pipeline need to generate `.csi` indices instead of `.bai` indices to compensate for the size of the reference genome (with samtools: `-c`). This parameter is not required for smaller references (including the human reference genomes hg19 or grch37/grch38).", + "fa_icon": "fas fa-address-book" + }, "save_reference": { "type": "boolean", "description": "Specify to save any pipeline-generated reference genome indices in the results directory.", @@ -129,6 +135,25 @@ "description": "Specify the FASTA header of the target chromosome to extend when using `circularmapper`.", "help_text": "The entry (chromosome, contig, etc.) in your FASTA reference that you'd like to be treated as circular.\n\nApplies only when providing a single FASTA file via `--fasta` (NOT multi-reference input - see reference TSV/CSV input).\n\n> Modifies tool parameter(s):\n> - circulargenerator `-s`", "fa_icon": "fas fa-bullseye" + }, + "fasta_circularmapper_elongationfactor": { + "type": "integer", + "default": 500, + "description": "Specify the number of bases to extend reference by (circularmapper only).", + "help_text": "The number of bases to extend the reference genome with. By default this is set to 500 if not specified otherwise.", + "fa_icon": "fas fa-external-link-alt" + }, + "fasta_circularmapper_elongatedfasta": { + "type": "string", + "description": "Specify an elongated reference FASTA to be used for circularmapper.", + "help_text": "Specify an already elongated FASTA file for circularmapper to avoid reprocessing.", + "fa_icon": "fas fa-address-book" + }, + "fasta_circularmapper_elongatedindex": { + "type": "string", + "description": "Specify a samtools index for the elongated FASTA file.", + "help_text": "Specify the index for an already elongated FASTA file to avoid reprocessing.", + "fa_icon": "fas fa-address-book" } } }, @@ -506,12 +531,6 @@ "help_text": "Specify which mapping tool to use. Options are BWA aln ('`bwaaln`'), BWA mem ('`bwamem`'), circularmapper ('`circularmapper`'), or Bowtie 2 ('`bowtie2`'). BWA aln is the default and highly suited for short-read ancient DNA. BWA mem can be quite useful for modern DNA, but is rarely used in projects for ancient DNA. CircularMapper enhances the mapping procedure to circular references, using the BWA algorithm but utilizing an extend-remap procedure (see [Peltzer et al 2016](https://doi.org/10.1186/s13059-016-0918-z) for details). Bowtie 2 is similar to BWA aln, and has recently been suggested to provide slightly better results under certain conditions ([Poullet and Orlando 2020](https://doi.org/10.3389/fevo.2020.00105)), as well as providing extra functionality (such as FASTQ trimming).\n\nMore documentation can be seen for each tool under:\n\n- [BWA aln](http://bio-bwa.sourceforge.net/bwa.shtml#3)\n- [BWA mem](http://bio-bwa.sourceforge.net/bwa.shtml#3)\n- [CircularMapper](https://circularmapper.readthedocs.io/en/latest/contents/userguide.html)\n- [Bowtie 2](http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml#command-line)", "fa_icon": "fas fa-hammer" }, - "fasta_largeref": { - "type": "boolean", - "description": "Specify to generate '.csi' BAM indices instead of '.bai' for larger reference genomes.", - "help_text": "This parameter is required to be set for large reference genomes. If your reference genome is larger than 3.5GB, the `samtools index` calls in the pipeline need to generate `.csi` indices instead of `.bai` indices to compensate for the size of the reference genome (with samtools: `-c`). This parameter is not required for smaller references (including the human reference genomes hg19 or grch37/grch38).", - "fa_icon": "fas fa-address-book" - }, "mapping_bwaaln_n": { "type": "number", "default": 0.01, @@ -605,24 +624,11 @@ "help_text": "The maximum fragment for valid paired-end alignments. Only for paired-end mapping (i.e. unmerged), and therefore typically only useful for modern data.\n\n> Modifies Bowtie2 parameter: `--maxins`", "fa_icon": "fas fa-exchange-alt" }, - "mapping_circularmapper_elongation_factor": { - "type": "integer", - "default": 500, - "description": "Specify the number of bases to extend reference by (circularmapper only).", - "help_text": "The number of bases to extend the reference genome with. By default this is set to 500 if not specified otherwise.", - "fa_icon": "fas fa-external-link-alt" - }, - "mapping_circularmapper_elongated_fasta": { - "type": "string", - "description": "Specify an elongated reference FASTA to be used for circularmapper.", - "help_text": "Specify an already elongated FASTA file for circularmapper to avoid reprocessing.", - "fa_icon": "fas fa-address-book" - }, - "mapping_circularmapper_elongated_fai": { - "type": "string", - "description": "Specify a samtools index for the elongated FASTA file.", - "help_text": "Specify the index for an already elongated FASTA file to avoid reprocessing.", - "fa_icon": "fas fa-address-book" + "mapping_circularmapper_circularfilter": { + "type": "boolean", + "fa_icon": "fas fa-filter", + "description": "Turn on to remove reads that did not map to the circularised genome.", + "help_text": "If you want to filter out reads that don't map to a circular chromosome (and also non-circular chromosome headers) from the resulting BAM file, turn this on.\n\n> Modifies `-f` and `-x` parameters of CircularMapper's RealignSAMFile" } }, "fa_icon": "fas fa-layer-group" diff --git a/subworkflows/local/elongate_reference.nf b/subworkflows/local/elongate_reference.nf index 9040ea23f..5c07a3e25 100644 --- a/subworkflows/local/elongate_reference.nf +++ b/subworkflows/local/elongate_reference.nf @@ -67,7 +67,7 @@ workflow ELONGATE_REFERENCE { .multiMap { meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_index, fasta, fai, dict, mapindex -> - def elongation_factor = params.mapping_circularmapper_elongation_factor + def elongation_factor = params.fasta_circularmapper_elongationfactor fasta: [ meta, fasta ] elongation_factor : [ meta, elongation_factor ] diff --git a/subworkflows/local/map.nf b/subworkflows/local/map.nf index d66009787..55b222372 100644 --- a/subworkflows/local/map.nf +++ b/subworkflows/local/map.nf @@ -123,7 +123,7 @@ workflow MAP { [ meta, elongated_index ] } - CIRCULARMAPPER( index, ch_elongated_reference_for_mapping, reads, params.mapping_circularmapper_elongation_factor ) + CIRCULARMAPPER( index, ch_elongated_reference_for_mapping, reads, params.fasta_circularmapper_elongationfactor ) ch_versions = ch_versions.mix ( CIRCULARMAPPER.out.versions ) ch_mapped_lane_bam = CIRCULARMAPPER.out.bam ch_mapped_lane_bai = CIRCULARMAPPER.out.bai diff --git a/subworkflows/local/reference_indexing_multi.nf b/subworkflows/local/reference_indexing_multi.nf index 313f3b59e..00276a5b9 100644 --- a/subworkflows/local/reference_indexing_multi.nf +++ b/subworkflows/local/reference_indexing_multi.nf @@ -19,14 +19,14 @@ workflow REFERENCE_INDEXING_MULTI { // Import reference sheet and change empty arrays to empty strings for compatibility with single reference input ch_splitreferencesheet_for_branch = Channel.fromSamplesheet("fasta_sheet") .map{ - meta, fasta, fai, dict, mapper_index, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai, mitochondrion, capture_bed, pileupcaller_bed, pileupcaller_snp, hapmap, pmd_masked_fasta, pmd_bed_for_masking, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp -> + meta, fasta, fai, dict, mapper_index, circular_target, circularmapper_elongatedfasta, circularmapper_elongatedindex, mitochondrion, capture_bed, pileupcaller_bed, pileupcaller_snp, hapmap, pmd_masked_fasta, pmd_bed_for_masking, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp -> meta.ploidy = meta.genotyping_ploidy != null ? meta.genotyping_ploidy : params.genotyping_reference_ploidy fai = fai != [] ? fai : "" dict = dict != [] ? dict : "" mapper_index = mapper_index != [] ? mapper_index : "" circular_target = circular_target != [] ? circular_target : "" - circularmapper_elongated_fasta = circularmapper_elongated_fasta != [] ? circularmapper_elongated_fasta : "" - circularmapper_elongated_fai = circularmapper_elongated_fai != [] ? circularmapper_elongated_fai : "" + circularmapper_elongatedfasta = circularmapper_elongatedfasta != [] ? circularmapper_elongatedfasta : "" + circularmapper_elongatedindex = circularmapper_elongatedindex != [] ? circularmapper_elongatedindex : "" mitochondrion = mitochondrion != [] ? mitochondrion : "" capture_bed = capture_bed != [] ? capture_bed : "" pileupcaller_bed = pileupcaller_bed != [] ? pileupcaller_bed : "" @@ -37,7 +37,7 @@ workflow REFERENCE_INDEXING_MULTI { sexdet_bed = sexdet_bed != [] ? sexdet_bed : "" bedtools_feature = bedtools_feature != [] ? bedtools_feature : "" genotyping_gatk_dbsnp = genotyping_gatk_dbsnp != [] ? genotyping_gatk_dbsnp : "" - [ meta - meta.subMap('genotyping_ploidy'), fasta, fai, dict, mapper_index, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai, mitochondrion, capture_bed, pileupcaller_bed, pileupcaller_snp, hapmap, pmd_masked_fasta, pmd_bed_for_masking, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp ] + [ meta - meta.subMap('genotyping_ploidy'), fasta, fai, dict, mapper_index, circular_target, circularmapper_elongatedfasta, circularmapper_elongatedindex, mitochondrion, capture_bed, pileupcaller_bed, pileupcaller_snp, hapmap, pmd_masked_fasta, pmd_bed_for_masking, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp ] } // GENERAL DESCRIPTION FOR NEXT SECTIONS @@ -53,9 +53,9 @@ workflow REFERENCE_INDEXING_MULTI { ch_input_from_referencesheet = ch_splitreferencesheet_for_branch .multiMap { - meta, fasta, fai, dict, mapper_index, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai, mitochondrion, capture_bed, pileupcaller_bed, pileupcaller_snp, hapmap, pmd_masked_fasta, pmd_bed_for_masking, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp -> + meta, fasta, fai, dict, mapper_index, circular_target, circularmapper_elongatedfasta, circularmapper_elongatedindex, mitochondrion, capture_bed, pileupcaller_bed, pileupcaller_snp, hapmap, pmd_masked_fasta, pmd_bed_for_masking, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp -> generated: [ meta, fasta, fai, dict, mapper_index ] - circularmapper: [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] + circularmapper: [ meta, circular_target, circularmapper_elongatedfasta, circularmapper_elongatedindex ] mitochondrion_header: [ meta, mitochondrion ] angsd_hapmap: [ meta, hapmap ] pmd_masked_fasta: [ meta, pmd_masked_fasta ] @@ -202,7 +202,7 @@ workflow REFERENCE_INDEXING_MULTI { emit: reference = ch_indexmapper_for_reference // [ meta, fasta, fai, dict, mapindex ] - elongated_reference = ch_input_from_referencesheet.circularmapper // [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] + elongated_reference = ch_input_from_referencesheet.circularmapper // [ meta, circular_target, circularmapper_elongatedfasta, circularmapper_elongatedindex ] mitochondrion_header = ch_input_from_referencesheet.mitochondrion_header // [ meta, mitochondrion ] hapmap = ch_input_from_referencesheet.angsd_hapmap // [ meta, hapmap ] pmd_masked_fasta = ch_input_from_referencesheet.pmd_masked_fasta // [ meta, pmd_masked_fasta ] diff --git a/subworkflows/local/reference_indexing_single.nf b/subworkflows/local/reference_indexing_single.nf index 10164f31e..41feced1e 100644 --- a/subworkflows/local/reference_indexing_single.nf +++ b/subworkflows/local/reference_indexing_single.nf @@ -89,16 +89,16 @@ workflow REFERENCE_INDEXING_SINGLE { def bedtools_feature = params.mapstats_bedtools_featurefile != null ? file(params.mapstats_bedtools_featurefile, checkIfExists: true ) : "" def genotyping_reference_ploidy = params.genotyping_reference_ploidy def genotyping_gatk_dbsnp = params.genotyping_gatk_dbsnp != null ? file(params.genotyping_gatk_dbsnp, checkIfExists: true ) : "" - def circularmapper_elongated_fasta = params.mapping_circularmapper_elongated_fasta != null ? file( params.mapping_circularmapper_elongated_fasta, checkIfExists: true ) : "" - def circularmapper_elongated_fai = params.mapping_circularmapper_elongated_fai != null ? file( params.mapping_circularmapper_elongated_fai, checkIfExists: true ) : "" - [ meta + [ ploidy: genotyping_reference_ploidy ], fasta, fai, dict, mapper_index, params.fasta_circular_target, params.mitochondrion_header, contamination_estimation_angsd_hapmap, pmd_masked_fasta, pmd_bed_for_masking, capture_bed, pileupcaller_bed, pileupcaller_snp, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp, circularmapper_elongated_fasta, circularmapper_elongated_fai ] + def circularmapper_elongated_fasta = params.fasta_circularmapper_elongatedfasta != null ? file( params.fasta_circularmapper_elongatedfasta, checkIfExists: true ) : "" + def circularmapper_elongated_index = params.fasta_circularmapper_elongatedindex != null ? file( params.fasta_circularmapper_elongatedindex, checkIfExists: true ) : "" + [ meta + [ ploidy: genotyping_reference_ploidy ], fasta, fai, dict, mapper_index, params.fasta_circular_target, params.mitochondrion_header, contamination_estimation_angsd_hapmap, pmd_masked_fasta, pmd_bed_for_masking, capture_bed, pileupcaller_bed, pileupcaller_snp, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp, circularmapper_elongated_fasta, circularmapper_elongated_index ] } ch_ref_index_single = ch_reference_for_mapping .multiMap{ - meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion_header, contamination_estimation_angsd_hapmap, pmd_masked_fasta, pmd_bed_for_masking, capture_bed, pileupcaller_bed, pileupcaller_snp, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp, circularmapper_elongated_fasta, circularmapper_elongated_fai -> + meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion_header, contamination_estimation_angsd_hapmap, pmd_masked_fasta, pmd_bed_for_masking, capture_bed, pileupcaller_bed, pileupcaller_snp, sexdet_bed, bedtools_feature, genotyping_gatk_dbsnp, circularmapper_elongated_fasta, circularmapper_elongated_index -> reference: [ meta, fasta, fai, dict, mapper_index ] - circularmapper: [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] + circularmapper: [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_index ] mito_header: [ meta, mitochondrion_header ] hapmap: [ meta, contamination_estimation_angsd_hapmap ] pmd_masked_fasta: [ meta, pmd_masked_fasta ] @@ -112,7 +112,7 @@ workflow REFERENCE_INDEXING_SINGLE { emit: reference = ch_ref_index_single.reference // [ meta, fasta, fai, dict, mapindex ] - elongated_reference = ch_ref_index_single.circularmapper // [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_fai ] + elongated_reference = ch_ref_index_single.circularmapper // [ meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_index ] mitochondrion_header = ch_ref_index_single.mito_header // [ meta, mito_header ] hapmap = ch_ref_index_single.hapmap // [ meta, hapmap ] pmd_masked_fasta = ch_ref_index_single.pmd_masked_fasta // [ meta, pmd_masked_fasta ] diff --git a/subworkflows/local/utils_nfcore_eager_pipeline/main.nf b/subworkflows/local/utils_nfcore_eager_pipeline/main.nf index a7d75dd28..ba460c49e 100644 --- a/subworkflows/local/utils_nfcore_eager_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_eager_pipeline/main.nf @@ -231,7 +231,7 @@ def validateInputParameters() { if ( params.genotyping_source == 'pmd' && ! params.run_pmd_filtering ) { exit 1, ("[nf-core/eager] ERROR: --genotyping_source cannot be 'pmd' unless PMD-filtering is ran.") } if ( params.genotyping_source == 'rescaled' && ! params.run_mapdamage_rescaling ) { exit 1, ("[nf-core/eager] ERROR: --genotyping_source cannot be 'rescaled' unless aDNA damage rescaling is ran.") } if ( params.fasta && params.run_genotyping && params.genotyping_tool == 'pileupcaller' && ! (params.genotyping_pileupcaller_bedfile || params.genotyping_pileupcaller_snpfile ) ) { exit 1, ("[nf-core/eager] ERROR: Genotyping with pileupcaller requires both '--genotyping_pileupcaller_bedfile' AND '--genotyping_pileupcaller_snpfile' to be provided.") } - if ( params.fasta && params.mapping_tool == "circularmapper" && ! params.fasta_circular_target && ! params.mapping_circularmapper_elongated_fasta ) { exit 1, ("[nf-core/eager] ERROR: Mapping with circularmapper requires either --fasta_circular_target or --mapping_circularmapper_elongated_fasta. ") } + if ( params.fasta && params.mapping_tool == "circularmapper" && ! params.fasta_circular_target && ! params.fasta_circularmapper_elongatedfasta ) { exit 1, ("[nf-core/eager] ERROR: Mapping with circularmapper requires either --fasta_circular_target or --fasta_circularmapper_elongatedfasta. ") } } // From 3b3d1d0c105ba20404d5d2e85a87809d42703ed5 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 2 Aug 2024 15:41:03 +0200 Subject: [PATCH 46/61] Add suggestions from review --- nextflow_schema.json | 6 +++--- subworkflows/local/circularmapper.nf | 2 +- subworkflows/local/reference_indexing.nf | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index d2cc0d72a..e64216c1c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -140,19 +140,19 @@ "type": "integer", "default": 500, "description": "Specify the number of bases to extend reference by (circularmapper only).", - "help_text": "The number of bases to extend the reference genome with. By default this is set to 500 if not specified otherwise.", + "help_text": "The number of bases to extend the beginning and end of each reference genome with.", "fa_icon": "fas fa-external-link-alt" }, "fasta_circularmapper_elongatedfasta": { "type": "string", "description": "Specify an elongated reference FASTA to be used for circularmapper.", - "help_text": "Specify an already elongated FASTA file for circularmapper to avoid reprocessing.", + "help_text": "Specify an already elongated FASTA file for circularmapper to avoid regeneration.", "fa_icon": "fas fa-address-book" }, "fasta_circularmapper_elongatedindex": { "type": "string", "description": "Specify a samtools index for the elongated FASTA file.", - "help_text": "Specify the index for an already elongated FASTA file to avoid reprocessing.", + "help_text": "Specify the index for an already elongated FASTA file to avoid regeneration.", "fa_icon": "fas fa-address-book" } } diff --git a/subworkflows/local/circularmapper.nf b/subworkflows/local/circularmapper.nf index da3de830f..45726ed0e 100644 --- a/subworkflows/local/circularmapper.nf +++ b/subworkflows/local/circularmapper.nf @@ -21,7 +21,7 @@ workflow CIRCULARMAPPER { ch_realigned_bais = Channel.empty() ch_realigned_csis = Channel.empty() - // While mapping with BWA will need the elongated reference index, RealignSAMFile apparently does NOT need the elongated reference to be present, only the elongation factor. + // Although mapping with BWA will need the elongated reference index, RealignSAMFile apparently does NOT need the elongated reference to be present, only the elongation factor. FASTQ_ALIGN_BWAALN_ELONGATED( ch_fastq_reads, ch_elongated_reference ) ch_versions = ch_versions.mix( FASTQ_ALIGN_BWAALN_ELONGATED.out.versions.first() ) diff --git a/subworkflows/local/reference_indexing.nf b/subworkflows/local/reference_indexing.nf index 2fa931938..7f1d54991 100644 --- a/subworkflows/local/reference_indexing.nf +++ b/subworkflows/local/reference_indexing.nf @@ -133,7 +133,7 @@ workflow REFERENCE_INDEXING { // Throw errors if required parameters are missing ch_elongated_for_gunzip = ch_elongated_reference .filter{ it[1] != "" || it[2] != "" } - .ifEmpty{ error "[nf-core/eager]: ERROR: Mapping with circularmapper requires either a circular target or elongated reference file for at least one reference." } + .ifEmpty{ error "[nf-core/eager] ERROR: Mapping with circularmapper requires either a circular target or elongated reference file for at least one reference." } // This ELONGATE_REFERENCE subworkflow also checks if the provided reference is gzipped, and unzips it if necessary. ELONGATE_REFERENCE( ch_reference_for_mapping, ch_elongated_reference ) From d03b7c7a76a926b3b4936c003547862bb73a4516 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 5 Aug 2024 14:00:18 +0200 Subject: [PATCH 47/61] linting --- docs/output.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index c5a87feb1..e22b29149 100644 --- a/docs/output.md +++ b/docs/output.md @@ -668,4 +668,3 @@ When using pileupCaller for genotyping, single-stranded and double-stranded libr
[ANGSD](http://www.popgen.dk/angsd/index.php/ANGSD) is a software for analyzing next generation sequencing data. It can estimate genotype likelihoods and allele frequencies from next-generation sequencing data. The output provided is a bgzipped genotype likelihood file, containing likelihoods across all samples per reference. Users can specify the model used for genotype likelihood estimation, as well as the output format. For more information on the available options, see the [ANGSD](https://www.popgen.dk/angsd/index.php/Genotype_Likelihoods). - From f72531bff404caa632edaabfaf3d33b8f473c36d Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 5 Aug 2024 15:45:44 +0200 Subject: [PATCH 48/61] rename elongated ref channel for clarity with its purpose --- subworkflows/local/reference_indexing.nf | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/subworkflows/local/reference_indexing.nf b/subworkflows/local/reference_indexing.nf index 7f1d54991..5ac5e8866 100644 --- a/subworkflows/local/reference_indexing.nf +++ b/subworkflows/local/reference_indexing.nf @@ -27,7 +27,7 @@ workflow REFERENCE_INDEXING { // If input (multi-)reference sheet supplied REFERENCE_INDEXING_MULTI ( fasta ) ch_reference_for_mapping = REFERENCE_INDEXING_MULTI.out.reference - ch_elongated_reference = REFERENCE_INDEXING_MULTI.out.elongated_reference + ch_reference_to_elongate = REFERENCE_INDEXING_MULTI.out.elongated_reference ch_mitochondrion_header = REFERENCE_INDEXING_MULTI.out.mitochondrion_header ch_hapmap = REFERENCE_INDEXING_MULTI.out.hapmap ch_pmd_masked_fasta = REFERENCE_INDEXING_MULTI.out.pmd_masked_fasta @@ -41,7 +41,7 @@ workflow REFERENCE_INDEXING { } else { // If input FASTA and/or indicies supplied REFERENCE_INDEXING_SINGLE ( fasta, fasta_fai, fasta_dict, fasta_mapperindexdir ) - ch_elongated_reference = REFERENCE_INDEXING_SINGLE.out.elongated_reference + ch_reference_to_elongate = REFERENCE_INDEXING_SINGLE.out.elongated_reference ch_mitochondrion_header = REFERENCE_INDEXING_SINGLE.out.mitochondrion_header ch_hapmap = REFERENCE_INDEXING_SINGLE.out.hapmap ch_pmd_masked_fasta = REFERENCE_INDEXING_SINGLE.out.pmd_masked_fasta @@ -131,16 +131,19 @@ workflow REFERENCE_INDEXING { // Elongate reference for circularmapper if requested if ( params.mapping_tool == "circularmapper" ) { // Throw errors if required parameters are missing - ch_elongated_for_gunzip = ch_elongated_reference - .filter{ it[1] != "" || it[2] != "" } + ch_elongated_for_gunzip = ch_reference_to_elongate + .filter{ + meta, circular_target, circularmapper_elongatedfasta, circularmapper_elongatedindex -> + circular_target != "" || circularmapper_elongatedfasta != "" + } .ifEmpty{ error "[nf-core/eager] ERROR: Mapping with circularmapper requires either a circular target or elongated reference file for at least one reference." } // This ELONGATE_REFERENCE subworkflow also checks if the provided reference is gzipped, and unzips it if necessary. - ELONGATE_REFERENCE( ch_reference_for_mapping, ch_elongated_reference ) + ELONGATE_REFERENCE( ch_reference_for_mapping, ch_reference_to_elongate ) ch_version = ch_versions.mix( ELONGATE_REFERENCE.out.versions ) ch_elongated_indexed_reference = ELONGATE_REFERENCE.out.circular_reference } else { - ch_elongated_indexed_reference = ch_elongated_reference + ch_elongated_indexed_reference = ch_reference_to_elongate } emit: From 8fe564bd314d92b0f3d1dd831385f0f2771ffb3b Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 5 Aug 2024 15:45:58 +0200 Subject: [PATCH 49/61] add mapping_circularmapper_circularfilter option --- conf/modules.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/modules.config b/conf/modules.config index 0f9a9ed54..d944ed993 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -562,6 +562,7 @@ process { // Configuration for BWA_ALN and BWA_SAMSE/SAMPE is the same as for the non-circular mapping withName: CIRCULARMAPPER_REALIGNSAMFILE { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.args = { params.mapping_circularmapper_circularfilter ? "-f true -x true" : "" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ enabled: false From 2a98bd9de7e01b80564154e02cb5f2eb174c7ada Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 5 Aug 2024 15:49:32 +0200 Subject: [PATCH 50/61] update manual tests --- docs/development/manual_tests.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/development/manual_tests.md b/docs/development/manual_tests.md index 50334e5b4..7d9f9a8fe 100644 --- a/docs/development/manual_tests.md +++ b/docs/development/manual_tests.md @@ -305,6 +305,14 @@ nextflow run main.nf -profile test,docker --outdir ./results -w work/ -resume -d nextflow run main.nf -profile test_multiref,docker --outdir ./results -w work/ -resume -dump-channels -ansi-log false --fasta_sheet /Users/lamnidis/Software/github/jbv2/eager/data/reference/reference_sheet_multiref.csv --mapping_tool circularmapper --fasta_largeref --mapping_bwaaln_n 0.05 --mapping_bwaaln_k 3 ``` +```bash +## Multiref with circularmapper PLUS filtering. +## Expect: No elongation for Mammoth MT. +## Check: 2 bam files together with their CSIs and Flagstats in the `mapping/circularmapper` directory. (6 files total) +nextflow run main.nf -profile test,docker --outdir ./results -w work/ -resume -dump-channels -ansi-log false --fasta_circular_target 'NC_007596.2' --mapping_tool circularmapper --fasta_circularmapper_elongatedfasta data/reference/Mammoth_MT_Krause_500/Mammoth_MT_Krause_500.fasta --fasta_circularmapper_elongatedindex data/reference/Mammoth_MT_Krause_500/bwa --fasta_largeref --mapping_bwaaln_n 0.05 --mapping_bwaaln_k 3 --mapping_circularmapper_circularfilter +``` + + ## Host Removal All possible parameters From 6a076c2c0906cacc5e071b99bb61506597632324 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 9 Aug 2024 10:29:15 +0200 Subject: [PATCH 51/61] update CM modules --- modules.json | 4 ++-- .../circularmapper/circulargenerator/main.nf | 12 ++++++++---- .../circularmapper/circulargenerator/meta.yml | 15 ++++----------- .../nf-core/circularmapper/realignsamfile/main.nf | 3 +++ .../circularmapper/realignsamfile/meta.yml | 9 +++++++++ 5 files changed, 26 insertions(+), 17 deletions(-) diff --git a/modules.json b/modules.json index ed96492a2..a732a3cf9 100644 --- a/modules.json +++ b/modules.json @@ -97,12 +97,12 @@ }, "circularmapper/circulargenerator": { "branch": "master", - "git_sha": "0148d00e72e35cd08b3d829d7de3430bc0c92a5a", + "git_sha": "a7b0131370d9bc38076efad88773bca5537203d0", "installed_by": ["modules"] }, "circularmapper/realignsamfile": { "branch": "master", - "git_sha": "579d2d5f15e126a2190a7b709dfc77696c83688d", + "git_sha": "a7b0131370d9bc38076efad88773bca5537203d0", "installed_by": ["modules"] }, "damageprofiler": { diff --git a/modules/nf-core/circularmapper/circulargenerator/main.nf b/modules/nf-core/circularmapper/circulargenerator/main.nf index 07b722c39..9463ec497 100644 --- a/modules/nf-core/circularmapper/circulargenerator/main.nf +++ b/modules/nf-core/circularmapper/circulargenerator/main.nf @@ -16,8 +16,9 @@ process CIRCULARMAPPER_CIRCULARGENERATOR { tuple val(meta3), val(target) output: - tuple val(meta), path("*_${elongation_factor}.fasta"), emit: fasta - path "versions.yml" , emit: versions + tuple val(meta), path("*_${elongation_factor}.fasta") , emit: fasta + tuple val(meta), path("*${elongation_factor}_elongated") , emit: elongated + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -25,6 +26,7 @@ process CIRCULARMAPPER_CIRCULARGENERATOR { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def full_extension = reference.getName().replaceFirst(reference.getSimpleName(), "") """ circulargenerator \ -e ${elongation_factor} \ @@ -33,8 +35,9 @@ process CIRCULARMAPPER_CIRCULARGENERATOR { $args ## circulargenerator has a hardcoded output name. Rename if necessary to use prefix. - if [[ "${reference.getBaseName()}_${elongation_factor}.fasta" != "${prefix}_${elongation_factor}.fasta" ]]; then - mv ${reference.getBaseName()}_${elongation_factor}.fasta ${prefix}_${elongation_factor}.fasta + if [[ "${reference.getSimpleName()}_${elongation_factor}${full_extension}" != "${prefix}_${elongation_factor}.fasta" ]]; then + mv ${reference.getSimpleName()}_${elongation_factor}${full_extension} ${prefix}_${elongation_factor}.fasta + mv ${reference}_${elongation_factor}_elongated ${prefix}.fasta_${elongation_factor}_elongated fi cat <<-END_VERSIONS > versions.yml @@ -48,6 +51,7 @@ process CIRCULARMAPPER_CIRCULARGENERATOR { def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}_${elongation_factor}.fasta + touch ${prefix}.fasta_${elongation_factor}_elongated cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/circularmapper/circulargenerator/meta.yml b/modules/nf-core/circularmapper/circulargenerator/meta.yml index 2704fbc36..baa39e74b 100644 --- a/modules/nf-core/circularmapper/circulargenerator/meta.yml +++ b/modules/nf-core/circularmapper/circulargenerator/meta.yml @@ -14,57 +14,50 @@ tools: tool_dev_url: "https://github.com/apeltzer/CircularMapper" doi: "no DOI available" licence: ["GPL v3"] - input: - meta: type: map description: | Groovy Map containing reference information e.g. `[ id:'sample1' ]` - - reference: type: file description: Genome fasta file pattern: "*.fasta" - - meta2: type: map description: | Groovy Map containing reference information e.g. `[ id:'sample1' ]` - - elongation_factor: type: integer description: The number of bases that the ends of the target chromosome in the reference genome should be elongated by - - meta3: type: map description: | Groovy Map containing reference information e.g. `[ id:'sample1' ]` - - target: type: string description: The name of the chromosome in the reference genome that should be elongated - output: - #Only when we have meta - meta: type: map description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - - versions: type: file description: File containing software versions pattern: "versions.yml" - - fasta: type: file description: Genome fasta file pattern: "*.fasta" - + - elongated: + type: file + description: File listing the chromosomes that were elongated + pattern: "*_elongated" authors: - "@apalleja" - "@TCLamnidis" diff --git a/modules/nf-core/circularmapper/realignsamfile/main.nf b/modules/nf-core/circularmapper/realignsamfile/main.nf index 9d74f7b91..6363b8d25 100644 --- a/modules/nf-core/circularmapper/realignsamfile/main.nf +++ b/modules/nf-core/circularmapper/realignsamfile/main.nf @@ -11,6 +11,9 @@ process CIRCULARMAPPER_REALIGNSAMFILE { tuple val(meta), path(bam) tuple val(meta2), path(fasta) tuple val(meta3), val(elongation_factor) + tuple val(meta4), path(elongated_chr_list) + // NOTE: The elongated_chr_list is not used in the script, but is an implicit input that realignsamfile requires when using the `-f true` option. + // In its absence, when `-f true` is set, realignsamfile will remove all @SQ tags from the BAM header, breaking the bamfile. output: tuple val(meta), path("*_realigned.bam") , emit: bam diff --git a/modules/nf-core/circularmapper/realignsamfile/meta.yml b/modules/nf-core/circularmapper/realignsamfile/meta.yml index fbb62d76d..94f74069e 100644 --- a/modules/nf-core/circularmapper/realignsamfile/meta.yml +++ b/modules/nf-core/circularmapper/realignsamfile/meta.yml @@ -43,6 +43,15 @@ input: - elongation_factor: type: integer description: The elongation factor used when running circulargenerator, i.e. the number of bases that the ends of the target chromosome in the reference genome was elongated by + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - elongated_chr_list: + type: file + description: File listing the chromosomes that were elongated + pattern: "*_elongated" output: - meta: type: map From 81d79f8411a6281703171b8b99d6f2f1acaebf76 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 9 Aug 2024 10:57:18 +0200 Subject: [PATCH 52/61] add elongated chromosome list needed for circularmapper filtering --- subworkflows/local/circularmapper.nf | 15 +++++++++------ subworkflows/local/elongate_reference.nf | 5 ++++- subworkflows/local/map.nf | 9 +++++---- subworkflows/local/reference_indexing.nf | 4 ++++ workflows/eager.nf | 2 +- 5 files changed, 23 insertions(+), 12 deletions(-) diff --git a/subworkflows/local/circularmapper.nf b/subworkflows/local/circularmapper.nf index 45726ed0e..46874f987 100644 --- a/subworkflows/local/circularmapper.nf +++ b/subworkflows/local/circularmapper.nf @@ -11,6 +11,7 @@ workflow CIRCULARMAPPER { take: ch_reference // channel (mandatory): [ val(meta), path(index), path(reference) ] ch_elongated_reference // channel (mandatory): [ val(meta), path(elongated_index) ] + ch_elongated_chr_list // channel (mandatory): [ val(meta), path(elongated_chr_list) ] ch_fastq_reads // channel (mandatory): [ val(meta), path(reads) ]. subworkImportant: meta REQUIRES single_end` entry! val_elongation_factor // int (mandatory): Elongation factor used for chromosome circularisation @@ -26,9 +27,10 @@ workflow CIRCULARMAPPER { ch_versions = ch_versions.mix( FASTQ_ALIGN_BWAALN_ELONGATED.out.versions.first() ) ch_ref_for_realignsamfile = ch_reference + .join( ch_elongated_chr_list ) .map { - meta, index, reference -> - [ meta, reference ] + meta, index, reference, elongated_chr_list -> + [ meta, reference, elongated_chr_list ] } .map { // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute @@ -48,12 +50,13 @@ workflow CIRCULARMAPPER { } .combine( ch_ref_for_realignsamfile, by: 0 ) .multiMap { - ignore_me, meta, bam, ref_meta, ref_fasta -> - bam: [ meta, bam ] - fasta: [ ref_meta, ref_fasta ] + ignore_me, meta, bam, ref_meta, ref_fasta, elongated_chr_list -> + bam: [ meta, bam ] + fasta: [ ref_meta, ref_fasta ] + chr_list: [ ref_meta, elongated_chr_list ] } - CIRCULARMAPPER_REALIGNSAMFILE( ch_input_for_realignsamfile.bam, ch_input_for_realignsamfile.fasta, [ [], val_elongation_factor ] ) + CIRCULARMAPPER_REALIGNSAMFILE( ch_input_for_realignsamfile.bam, ch_input_for_realignsamfile.fasta, [ [], val_elongation_factor ], ch_input_for_realignsamfile.chr_list ) ch_versions = ch_versions.mix( CIRCULARMAPPER_REALIGNSAMFILE.out.versions.first() ) ch_realigned_bams = CIRCULARMAPPER_REALIGNSAMFILE.out.bam diff --git a/subworkflows/local/elongate_reference.nf b/subworkflows/local/elongate_reference.nf index 5c07a3e25..42d3b78e7 100644 --- a/subworkflows/local/elongate_reference.nf +++ b/subworkflows/local/elongate_reference.nf @@ -16,6 +16,7 @@ workflow ELONGATE_REFERENCE { ch_multiqc_files = Channel.empty() ch_circular_reference = Channel.empty() ch_elongated_unzipped = Channel.empty() + ch_elongated_chr = Channel.empty() // Check if the provided elongated reference is gzipped, and if so, unzip it. ch_elongated_branches = ch_elongated_reference @@ -79,7 +80,8 @@ workflow ELONGATE_REFERENCE { ch_references_to_elongate.elongation_factor, ch_references_to_elongate.target ) - ch_versions = ch_versions.mix( CIRCULARMAPPER_CIRCULARGENERATOR.out.versions.first() ) + ch_elongated_chr = CIRCULARMAPPER_CIRCULARGENERATOR.out.elongated + ch_versions = ch_versions.mix( CIRCULARMAPPER_CIRCULARGENERATOR.out.versions.first() ) // Collect newly generated circular references and provided ones without an index, and index them. ch_input_for_circular_indexing = ch_circulargenerator_input.needs_index @@ -105,6 +107,7 @@ workflow ELONGATE_REFERENCE { emit: circular_reference = ch_circular_reference // [ meta, fasta, index ] + elongated_chr_list = ch_elongated_chr // [ meta, elongated_chr_list ] versions = ch_versions mqc = ch_multiqc_files } diff --git a/subworkflows/local/map.nf b/subworkflows/local/map.nf index 55b222372..43469c9e2 100644 --- a/subworkflows/local/map.nf +++ b/subworkflows/local/map.nf @@ -16,9 +16,10 @@ include { CIRCULARMAPPER } from '../../subw workflow MAP { take: - reads // [ [meta], [read1, reads2] ] or [ [meta], [read1] ] - index // [ [meta], [ index ], [ fasta ] ] - elogated_index // [ [meta], circularmapper_elongated_fasta, circularmapper_elongated_index ] + reads // [ [meta], [read1, reads2] ] or [ [meta], [read1] ] + index // [ [meta], [ index ], [ fasta ] ] + elogated_index // [ [meta], circularmapper_elongated_fasta, circularmapper_elongated_index ] + elongated_chr_list // [ [meta], elongated_chr_list ] main: ch_versions = Channel.empty() @@ -123,7 +124,7 @@ workflow MAP { [ meta, elongated_index ] } - CIRCULARMAPPER( index, ch_elongated_reference_for_mapping, reads, params.fasta_circularmapper_elongationfactor ) + CIRCULARMAPPER( index, ch_elongated_reference_for_mapping, elongated_chr_list, reads, params.fasta_circularmapper_elongationfactor ) ch_versions = ch_versions.mix ( CIRCULARMAPPER.out.versions ) ch_mapped_lane_bam = CIRCULARMAPPER.out.bam ch_mapped_lane_bai = CIRCULARMAPPER.out.bai diff --git a/subworkflows/local/reference_indexing.nf b/subworkflows/local/reference_indexing.nf index 5ac5e8866..e085e0f8d 100644 --- a/subworkflows/local/reference_indexing.nf +++ b/subworkflows/local/reference_indexing.nf @@ -142,13 +142,17 @@ workflow REFERENCE_INDEXING { ELONGATE_REFERENCE( ch_reference_for_mapping, ch_reference_to_elongate ) ch_version = ch_versions.mix( ELONGATE_REFERENCE.out.versions ) ch_elongated_indexed_reference = ELONGATE_REFERENCE.out.circular_reference + ch_elongated_chr_list = ELONGATE_REFERENCE.out.elongated_chr_list + } else { ch_elongated_indexed_reference = ch_reference_to_elongate + ch_elongated_chr_list = Channel.empty() } emit: reference = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex ] elongated_reference = ch_elongated_indexed_reference // [ meta, circularmapper_elongated_fasta, circularmapper_elongated_index ] + elongated_chr_list = ch_elongated_chr_list // [ meta, elongated_chr_list ] mitochondrion_header = ch_mitochondrion_header // [ meta, mitochondrion_header ] hapmap = ch_hapmap // [ meta, hapmap ] pmd_masking = ch_pmd_masking // [ meta, pmd_masked_fasta, pmd_bed_for_masking ] diff --git a/workflows/eager.nf b/workflows/eager.nf index 31bbee84f..2804db5fd 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -185,7 +185,7 @@ workflow EAGER { [ meta, index, fasta ] } - MAP ( ch_reads_for_mapping, ch_reference_for_mapping, REFERENCE_INDEXING.out.elongated_reference ) + MAP ( ch_reads_for_mapping, ch_reference_for_mapping, REFERENCE_INDEXING.out.elongated_reference, REFERENCE_INDEXING.out.elongated_chr_list ) ch_versions = ch_versions.mix( MAP.out.versions ) ch_multiqc_files = ch_multiqc_files.mix( MAP.out.mqc.collect{it[1]}.ifEmpty([]) ) From f901063ebc4219f81c60870b8d6356b70973285b Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 9 Aug 2024 11:49:09 +0200 Subject: [PATCH 53/61] start adding manual chrom_list creation --- docs/development/manual_tests.md | 4 ++-- subworkflows/local/elongate_reference.nf | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/docs/development/manual_tests.md b/docs/development/manual_tests.md index 7d9f9a8fe..b83ee686f 100644 --- a/docs/development/manual_tests.md +++ b/docs/development/manual_tests.md @@ -306,12 +306,12 @@ nextflow run main.nf -profile test_multiref,docker --outdir ./results -w work/ - ``` ```bash -## Multiref with circularmapper PLUS filtering. +## Circularmapper with circularfilter. ## Expect: No elongation for Mammoth MT. ## Check: 2 bam files together with their CSIs and Flagstats in the `mapping/circularmapper` directory. (6 files total) nextflow run main.nf -profile test,docker --outdir ./results -w work/ -resume -dump-channels -ansi-log false --fasta_circular_target 'NC_007596.2' --mapping_tool circularmapper --fasta_circularmapper_elongatedfasta data/reference/Mammoth_MT_Krause_500/Mammoth_MT_Krause_500.fasta --fasta_circularmapper_elongatedindex data/reference/Mammoth_MT_Krause_500/bwa --fasta_largeref --mapping_bwaaln_n 0.05 --mapping_bwaaln_k 3 --mapping_circularmapper_circularfilter ``` - + ## Host Removal diff --git a/subworkflows/local/elongate_reference.nf b/subworkflows/local/elongate_reference.nf index 42d3b78e7..70b868cff 100644 --- a/subworkflows/local/elongate_reference.nf +++ b/subworkflows/local/elongate_reference.nf @@ -5,6 +5,7 @@ include { GUNZIP as GUNZIP_ELONGATED_FASTA } from '../../modules/nf-core/gunzip/main' include { CIRCULARMAPPER_CIRCULARGENERATOR } from '../../modules/nf-core/circularmapper/circulargenerator/main' include { BWA_INDEX as BWA_INDEX_CIRCULARISED } from '../../modules/nf-core/bwa/index/main' +// include { addNewMetaFromAttributes } from '../../subworkflows/local/utils_nfcore_eager_pipeline/main' workflow ELONGATE_REFERENCE { take: @@ -61,6 +62,28 @@ workflow ELONGATE_REFERENCE { needs_elongation: circularmapper_elongated_fasta == "" && circular_target != "" } + // References that are already elongated, need ch_elongated_chr to be created from the circular target information + ch_needs_elongated_chr_list = ch_circulargenerator_input.ready + .mix( ch_circulargenerator_input.needs_index ) + .join( ch_reference ) + .map { + meta, circular_target, circularmapper_elongated_fasta, circularmapper_elongated_index, fasta, fai, dict, mapindex -> + [ meta, fasta, circular_target ] + } + .collectFile { + meta, fasta, circular_target -> + [ "${fasta.name}_500_elongated", circular_target + '\n' ] + } + /* The above gets the right information into the created files, but the channel then also needs a meta, which collectFile doesn't seem able to handle. + TODO Proposed solution: + - Use a map to infer the meta.id fromt he file name (i.e. file name without the suffix. since everything by now is unzipped, it should work). + - Then pull that info out of the ch_reference meta with addNewMetaFromAttributes. + - Join the channels by this meta + - Use a map to give the collected file the meta of the reference. + + This is a bit convoluted, but it should work. Would be simpler if I could create the meta within collectFile. + */ + // Elongate references that need it // Join the original references to the branch of needs_elongation, to get the original fasta files, and elongate them. ch_references_to_elongate = ch_circulargenerator_input.needs_elongation From 0b0bcf1bd02d365b9c97d452bdf6d30a3dfe3874 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 12 Aug 2024 11:37:23 +0200 Subject: [PATCH 54/61] create elongated_chr_list from circular target --- subworkflows/local/elongate_reference.nf | 40 ++++++++++++++++-------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/subworkflows/local/elongate_reference.nf b/subworkflows/local/elongate_reference.nf index 70b868cff..ab8c8972b 100644 --- a/subworkflows/local/elongate_reference.nf +++ b/subworkflows/local/elongate_reference.nf @@ -5,7 +5,7 @@ include { GUNZIP as GUNZIP_ELONGATED_FASTA } from '../../modules/nf-core/gunzip/main' include { CIRCULARMAPPER_CIRCULARGENERATOR } from '../../modules/nf-core/circularmapper/circulargenerator/main' include { BWA_INDEX as BWA_INDEX_CIRCULARISED } from '../../modules/nf-core/bwa/index/main' -// include { addNewMetaFromAttributes } from '../../subworkflows/local/utils_nfcore_eager_pipeline/main' +include { addNewMetaFromAttributes } from '../../subworkflows/local/utils_nfcore_eager_pipeline/main' workflow ELONGATE_REFERENCE { take: @@ -62,8 +62,20 @@ workflow ELONGATE_REFERENCE { needs_elongation: circularmapper_elongated_fasta == "" && circular_target != "" } - // References that are already elongated, need ch_elongated_chr to be created from the circular target information - ch_needs_elongated_chr_list = ch_circulargenerator_input.ready + /* References that are already elongated, need ch_elongated_chr to be created from the circular target information + 1) Get the reference information ready for joinin with the new channel. + 2) Take all subchannels from the multiMap that do not go through CircularGenerator (.ready,.needs_index) and infer the name of the elongated_chr_list expected by RealignSAMFile + 3) Put the circular target in a file of that name FOR EACH REFERENCE. The resulting channel has no meta, so we need to add it. + 4) Add meta, and use to merge back to the reference channel. This way we can take the original reference's meta. + + This is a bit convoluted, but it should work. Would be simpler if I could create the meta within collectFile, but I did not find a way to do that. + */ + ch_ref_for_chr_list = ch_reference + .map { + addNewMetaFromAttributes( it, "id", "id", false ) + } + + ch_chr_list_for_already_elongated_ref = ch_circulargenerator_input.ready .mix( ch_circulargenerator_input.needs_index ) .join( ch_reference ) .map { @@ -74,15 +86,17 @@ workflow ELONGATE_REFERENCE { meta, fasta, circular_target -> [ "${fasta.name}_500_elongated", circular_target + '\n' ] } - /* The above gets the right information into the created files, but the channel then also needs a meta, which collectFile doesn't seem able to handle. - TODO Proposed solution: - - Use a map to infer the meta.id fromt he file name (i.e. file name without the suffix. since everything by now is unzipped, it should work). - - Then pull that info out of the ch_reference meta with addNewMetaFromAttributes. - - Join the channels by this meta - - Use a map to give the collected file the meta of the reference. - - This is a bit convoluted, but it should work. Would be simpler if I could create the meta within collectFile. - */ + .map { + file -> + def id = file.getSimpleName() + [ [id: id ], file ] + } + .join(ch_ref_for_chr_list) + .map { + ignore_me, chr_list, meta, fasta, fai, dict, mapindex -> + [ meta, chr_list ] + } + .dump(tag: "collected_files", pretty:true) // Elongate references that need it // Join the original references to the branch of needs_elongation, to get the original fasta files, and elongate them. @@ -103,7 +117,7 @@ workflow ELONGATE_REFERENCE { ch_references_to_elongate.elongation_factor, ch_references_to_elongate.target ) - ch_elongated_chr = CIRCULARMAPPER_CIRCULARGENERATOR.out.elongated + ch_elongated_chr = ch_chr_list_for_already_elongated_ref.mix(CIRCULARMAPPER_CIRCULARGENERATOR.out.elongated) ch_versions = ch_versions.mix( CIRCULARMAPPER_CIRCULARGENERATOR.out.versions.first() ) // Collect newly generated circular references and provided ones without an index, and index them. From 7063fd2586624d5367e19f3d1e0a70936c292d8a Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 12 Aug 2024 11:48:14 +0200 Subject: [PATCH 55/61] Circular target required regardless of elongated fasta --- assets/schema_fasta.json | 2 +- nextflow_schema.json | 2 +- subworkflows/local/reference_indexing.nf | 5 +++-- subworkflows/local/utils_nfcore_eager_pipeline/main.nf | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/assets/schema_fasta.json b/assets/schema_fasta.json index 4256a542e..54e00de8e 100644 --- a/assets/schema_fasta.json +++ b/assets/schema_fasta.json @@ -46,7 +46,7 @@ "circular_target": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "The headers of the chromosome to be extended by circularmapper must not contain any spaces and no leading '>'." + "errorMessage": "The headers of the chromosome extended by circulargenerator must not contain any spaces and no leading '>'." }, "circularmapper_elongatedfasta": { "type": "string", diff --git a/nextflow_schema.json b/nextflow_schema.json index e64216c1c..67b97576f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -132,7 +132,7 @@ }, "fasta_circular_target": { "type": "string", - "description": "Specify the FASTA header of the target chromosome to extend when using `circularmapper`.", + "description": "Specify the FASTA header of the extended chromosome when using `circularmapper`.", "help_text": "The entry (chromosome, contig, etc.) in your FASTA reference that you'd like to be treated as circular.\n\nApplies only when providing a single FASTA file via `--fasta` (NOT multi-reference input - see reference TSV/CSV input).\n\n> Modifies tool parameter(s):\n> - circulargenerator `-s`", "fa_icon": "fas fa-bullseye" }, diff --git a/subworkflows/local/reference_indexing.nf b/subworkflows/local/reference_indexing.nf index e085e0f8d..29e768ba8 100644 --- a/subworkflows/local/reference_indexing.nf +++ b/subworkflows/local/reference_indexing.nf @@ -131,12 +131,13 @@ workflow REFERENCE_INDEXING { // Elongate reference for circularmapper if requested if ( params.mapping_tool == "circularmapper" ) { // Throw errors if required parameters are missing + // A circular target is required even when an elongated reference has been provided. ch_elongated_for_gunzip = ch_reference_to_elongate .filter{ meta, circular_target, circularmapper_elongatedfasta, circularmapper_elongatedindex -> - circular_target != "" || circularmapper_elongatedfasta != "" + circular_target != "" } - .ifEmpty{ error "[nf-core/eager] ERROR: Mapping with circularmapper requires either a circular target or elongated reference file for at least one reference." } + .ifEmpty{ error "[nf-core/eager] ERROR: Mapping with circularmapper requires either a circular target for at least one reference." } // This ELONGATE_REFERENCE subworkflow also checks if the provided reference is gzipped, and unzips it if necessary. ELONGATE_REFERENCE( ch_reference_for_mapping, ch_reference_to_elongate ) diff --git a/subworkflows/local/utils_nfcore_eager_pipeline/main.nf b/subworkflows/local/utils_nfcore_eager_pipeline/main.nf index ba460c49e..f6ec52c00 100644 --- a/subworkflows/local/utils_nfcore_eager_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_eager_pipeline/main.nf @@ -231,7 +231,7 @@ def validateInputParameters() { if ( params.genotyping_source == 'pmd' && ! params.run_pmd_filtering ) { exit 1, ("[nf-core/eager] ERROR: --genotyping_source cannot be 'pmd' unless PMD-filtering is ran.") } if ( params.genotyping_source == 'rescaled' && ! params.run_mapdamage_rescaling ) { exit 1, ("[nf-core/eager] ERROR: --genotyping_source cannot be 'rescaled' unless aDNA damage rescaling is ran.") } if ( params.fasta && params.run_genotyping && params.genotyping_tool == 'pileupcaller' && ! (params.genotyping_pileupcaller_bedfile || params.genotyping_pileupcaller_snpfile ) ) { exit 1, ("[nf-core/eager] ERROR: Genotyping with pileupcaller requires both '--genotyping_pileupcaller_bedfile' AND '--genotyping_pileupcaller_snpfile' to be provided.") } - if ( params.fasta && params.mapping_tool == "circularmapper" && ! params.fasta_circular_target && ! params.fasta_circularmapper_elongatedfasta ) { exit 1, ("[nf-core/eager] ERROR: Mapping with circularmapper requires either --fasta_circular_target or --fasta_circularmapper_elongatedfasta. ") } + if ( params.fasta && params.mapping_tool == "circularmapper" && ! params.fasta_circular_target ) { exit 1, ("[nf-core/eager] ERROR: Mapping with circularmapper requires --fasta_circular_target to be provided.") } } // From 5d8476fbfa73545581146912145958e1fc094a9e Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 12 Aug 2024 14:42:58 +0200 Subject: [PATCH 56/61] remove dumps --- subworkflows/local/elongate_reference.nf | 1 - subworkflows/local/map.nf | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/subworkflows/local/elongate_reference.nf b/subworkflows/local/elongate_reference.nf index ab8c8972b..bf020b684 100644 --- a/subworkflows/local/elongate_reference.nf +++ b/subworkflows/local/elongate_reference.nf @@ -96,7 +96,6 @@ workflow ELONGATE_REFERENCE { ignore_me, chr_list, meta, fasta, fai, dict, mapindex -> [ meta, chr_list ] } - .dump(tag: "collected_files", pretty:true) // Elongate references that need it // Join the original references to the branch of needs_elongation, to get the original fasta files, and elongate them. diff --git a/subworkflows/local/map.nf b/subworkflows/local/map.nf index 43469c9e2..10972a2a7 100644 --- a/subworkflows/local/map.nf +++ b/subworkflows/local/map.nf @@ -127,8 +127,7 @@ workflow MAP { CIRCULARMAPPER( index, ch_elongated_reference_for_mapping, elongated_chr_list, reads, params.fasta_circularmapper_elongationfactor ) ch_versions = ch_versions.mix ( CIRCULARMAPPER.out.versions ) ch_mapped_lane_bam = CIRCULARMAPPER.out.bam - ch_mapped_lane_bai = CIRCULARMAPPER.out.bai - + ch_mapped_lane_bai = CIRCULARMAPPER.out.bai // [ [ meta ], bai/csi ] } From 302acf3a61f8c52a8208ce10517fd78cb12743e9 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 12 Aug 2024 14:45:07 +0200 Subject: [PATCH 57/61] update manual tests --- docs/development/manual_tests.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/development/manual_tests.md b/docs/development/manual_tests.md index b83ee686f..b1b2e3996 100644 --- a/docs/development/manual_tests.md +++ b/docs/development/manual_tests.md @@ -299,19 +299,19 @@ nextflow run main.nf -profile test,docker --outdir ./results -w work/ -resume -d ``` ```bash -## Multiref with circularmapper. reference_sheet_multiref.csv edited to include elongated reference and index from first CM manual test for Mammoth_MT, and remove the human reference (save on runtime). Will still evaluate through reference_indexing_multi. -## Expect: No elongation for Mammoth MT. -## Check: 2 bam files together with their CSIs and Flagstats in the `mapping/circularmapper` directory. (6 files total) +## Multiref with circularmapper. reference_sheet_multiref.csv edited to include elongated reference and index from first CM manual test for Mammoth_MT. +## Expect: No elongation for Mammoth MT. Elongation for hs37d5_chr21-MT reference. +## Check: 2 bam files together with their CSIs and Flagstats in the `mapping/circularmapper` directory PER REFERENCE (3 libraries (from 2 samples) x 2 references x 3 files = 18 files total). +## Also, elongated hs37d5_chr21-MT is not saved, since --save_reference was not specified. But it did get elongated. nextflow run main.nf -profile test_multiref,docker --outdir ./results -w work/ -resume -dump-channels -ansi-log false --fasta_sheet /Users/lamnidis/Software/github/jbv2/eager/data/reference/reference_sheet_multiref.csv --mapping_tool circularmapper --fasta_largeref --mapping_bwaaln_n 0.05 --mapping_bwaaln_k 3 ``` ```bash -## Circularmapper with circularfilter. +## Circularmapper with circularfilter, with a provided elongated reference. ## Expect: No elongation for Mammoth MT. -## Check: 2 bam files together with their CSIs and Flagstats in the `mapping/circularmapper` directory. (6 files total) +## Check: 2 bam files together with their CSIs and Flagstats in the `mapping/circularmapper` directory. (6 files total). Ensure files have the @SQ tag of the circular choromosome. nextflow run main.nf -profile test,docker --outdir ./results -w work/ -resume -dump-channels -ansi-log false --fasta_circular_target 'NC_007596.2' --mapping_tool circularmapper --fasta_circularmapper_elongatedfasta data/reference/Mammoth_MT_Krause_500/Mammoth_MT_Krause_500.fasta --fasta_circularmapper_elongatedindex data/reference/Mammoth_MT_Krause_500/bwa --fasta_largeref --mapping_bwaaln_n 0.05 --mapping_bwaaln_k 3 --mapping_circularmapper_circularfilter ``` - ## Host Removal From c89ef24266f32d4799fa6b2dc1b614506deda4dc Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 12 Aug 2024 14:45:30 +0200 Subject: [PATCH 58/61] fix module configuration application --- conf/modules.config | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index d944ed993..fc0bf9b1f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -323,6 +323,9 @@ process { withName: SAMTOOLS_INDEX_BAM_INPUT { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + publishDir = [ + enabled: false + ] } // @@ -561,21 +564,31 @@ process { // Circular mapping // Configuration for BWA_ALN and BWA_SAMSE/SAMPE is the same as for the non-circular mapping withName: CIRCULARMAPPER_REALIGNSAMFILE { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } ext.args = { params.mapping_circularmapper_circularfilter ? "-f true -x true" : "" } - ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } + ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } + publishDir = [ + enabled: false + ] + } + + withName: ".*MAP:CIRCULARMAPPER:FASTQ_ALIGN_BWAALN_ELONGATED:SAMTOOLS_INDEX" { + tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + ext.args = { params.fasta_largeref ? "-c" : "" } + ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } publishDir = [ enabled: false ] } - withName: ".*MAP:FASTQ_ALIGN_BWAALN_ELONGATED:SAMTOOLS_INDEX" { + withName: ".*MAP:CIRCULARMAPPER:SAMTOOLS_INDEX_REALIGNED" { tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } ext.args = { params.fasta_largeref ? "-c" : "" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } publishDir = [ enabled: false ] + } // From 32fa8b12a12e99fe90df996fbaf6414b48a911a0 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 12 Aug 2024 15:10:07 +0200 Subject: [PATCH 59/61] update parameter name in CI --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 45c6b1106..b9827c75d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,7 +31,7 @@ jobs: - "-profile test,docker --preprocessing_tool adapterremoval --preprocessing_adapterlist 'https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/adapterremoval/adapterremoval_adapterlist.txt' --sequencing_qc_tool falco --run_genotyping --genotyping_tool 'freebayes' --genotyping_source 'raw'" - "-profile test,docker --mapping_tool bwamem --run_mapdamage_rescaling --run_pmd_filtering --run_trim_bam --run_genotyping --genotyping_tool 'ug' --genotyping_source 'trimmed'" - "-profile test,docker --mapping_tool bowtie2 --damagecalculation_tool mapdamage --damagecalculation_mapdamage_downsample 100 --run_genotyping --genotyping_tool 'hc' --genotyping_source 'raw'" - - "-profile test,docker --mapping_tool circularmapper --skip_preprocessing --convert_inputbam --fasta_circular_target 'NC_007596.2' --mapping_circularmapper_elongation_factor 500" + - "-profile test,docker --mapping_tool circularmapper --skip_preprocessing --convert_inputbam --fasta_circular_target 'NC_007596.2' --fasta_circularmapper_elongationfactor 500" - "-profile test_humanbam,docker --run_mtnucratio --run_contamination_estimation_angsd --snpcapture_bed 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz' --run_genotyping --genotyping_tool 'pileupcaller' --genotyping_source 'raw'" - "-profile test_humanbam,docker --run_sexdeterrmine --run_genotyping --genotyping_tool 'angsd' --genotyping_source 'raw'" - "-profile test_multiref,docker" ## TODO add damage manipulation here instead once it goes multiref From 753994c760945213f4f552b6024981d231ca8f2d Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 16 Aug 2024 10:08:48 +0200 Subject: [PATCH 60/61] Apply suggestions from code review --- nextflow_schema.json | 3 ++- subworkflows/local/reference_indexing.nf | 4 ++-- subworkflows/local/utils_nfcore_eager_pipeline/main.nf | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 67b97576f..ca823548d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -628,7 +628,8 @@ "type": "boolean", "fa_icon": "fas fa-filter", "description": "Turn on to remove reads that did not map to the circularised genome.", - "help_text": "If you want to filter out reads that don't map to a circular chromosome (and also non-circular chromosome headers) from the resulting BAM file, turn this on.\n\n> Modifies `-f` and `-x` parameters of CircularMapper's RealignSAMFile" + "help_text": "If you want to filter out reads that don't map to + the elongated/circularised chromosome (and also non-circular chromosome headers) from the resulting BAM file, turn this on.\n\n> Modifies `-f` and `-x` parameters of CircularMapper's RealignSAMFile" } }, "fa_icon": "fas fa-layer-group" diff --git a/subworkflows/local/reference_indexing.nf b/subworkflows/local/reference_indexing.nf index 29e768ba8..0080c75cf 100644 --- a/subworkflows/local/reference_indexing.nf +++ b/subworkflows/local/reference_indexing.nf @@ -27,7 +27,7 @@ workflow REFERENCE_INDEXING { // If input (multi-)reference sheet supplied REFERENCE_INDEXING_MULTI ( fasta ) ch_reference_for_mapping = REFERENCE_INDEXING_MULTI.out.reference - ch_reference_to_elongate = REFERENCE_INDEXING_MULTI.out.elongated_reference + ch_reference_to_elongate = REFERENCE_INDEXING_MULTI.out.elongated_reference ch_mitochondrion_header = REFERENCE_INDEXING_MULTI.out.mitochondrion_header ch_hapmap = REFERENCE_INDEXING_MULTI.out.hapmap ch_pmd_masked_fasta = REFERENCE_INDEXING_MULTI.out.pmd_masked_fasta @@ -41,7 +41,7 @@ workflow REFERENCE_INDEXING { } else { // If input FASTA and/or indicies supplied REFERENCE_INDEXING_SINGLE ( fasta, fasta_fai, fasta_dict, fasta_mapperindexdir ) - ch_reference_to_elongate = REFERENCE_INDEXING_SINGLE.out.elongated_reference + ch_reference_to_elongate = REFERENCE_INDEXING_SINGLE.out.elongated_reference ch_mitochondrion_header = REFERENCE_INDEXING_SINGLE.out.mitochondrion_header ch_hapmap = REFERENCE_INDEXING_SINGLE.out.hapmap ch_pmd_masked_fasta = REFERENCE_INDEXING_SINGLE.out.pmd_masked_fasta diff --git a/subworkflows/local/utils_nfcore_eager_pipeline/main.nf b/subworkflows/local/utils_nfcore_eager_pipeline/main.nf index f6ec52c00..2d076cf17 100644 --- a/subworkflows/local/utils_nfcore_eager_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_eager_pipeline/main.nf @@ -231,7 +231,7 @@ def validateInputParameters() { if ( params.genotyping_source == 'pmd' && ! params.run_pmd_filtering ) { exit 1, ("[nf-core/eager] ERROR: --genotyping_source cannot be 'pmd' unless PMD-filtering is ran.") } if ( params.genotyping_source == 'rescaled' && ! params.run_mapdamage_rescaling ) { exit 1, ("[nf-core/eager] ERROR: --genotyping_source cannot be 'rescaled' unless aDNA damage rescaling is ran.") } if ( params.fasta && params.run_genotyping && params.genotyping_tool == 'pileupcaller' && ! (params.genotyping_pileupcaller_bedfile || params.genotyping_pileupcaller_snpfile ) ) { exit 1, ("[nf-core/eager] ERROR: Genotyping with pileupcaller requires both '--genotyping_pileupcaller_bedfile' AND '--genotyping_pileupcaller_snpfile' to be provided.") } - if ( params.fasta && params.mapping_tool == "circularmapper" && ! params.fasta_circular_target ) { exit 1, ("[nf-core/eager] ERROR: Mapping with circularmapper requires --fasta_circular_target to be provided.") } + if ( params.fasta && params.mapping_tool == "circularmapper" && !params.fasta_circular_target ) { exit 1, ("[nf-core/eager] ERROR: Mapping with circularmapper requires --fasta_circular_target to be provided.") } } // From 81784cb24c538956efb895dc1a02e5166827f75a Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 16 Aug 2024 10:11:24 +0200 Subject: [PATCH 61/61] Fix linting --- nextflow_schema.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index ca823548d..dd252a9aa 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -628,8 +628,7 @@ "type": "boolean", "fa_icon": "fas fa-filter", "description": "Turn on to remove reads that did not map to the circularised genome.", - "help_text": "If you want to filter out reads that don't map to - the elongated/circularised chromosome (and also non-circular chromosome headers) from the resulting BAM file, turn this on.\n\n> Modifies `-f` and `-x` parameters of CircularMapper's RealignSAMFile" + "help_text": "If you want to filter out reads that don't map to elongated/circularised chromosome (and also non-circular chromosome headers) from the resulting BAM file, turn this on.\n\n> Modifies `-f` and `-x` parameters of CircularMapper's RealignSAMFile" } }, "fa_icon": "fas fa-layer-group"