diff --git a/modules/ebi-metagenomics/bwamem2decontnobams/environment.yml b/modules/ebi-metagenomics/bwamem2decontnobams/environment.yml new file mode 100644 index 00000000..5e236a75 --- /dev/null +++ b/modules/ebi-metagenomics/bwamem2decontnobams/environment.yml @@ -0,0 +1,11 @@ +name: bwamem2decontnobams + +channels: + - conda-forge + - bioconda + - defaults + +dependencies: + - bwa-mem2=2.2.1 + - htslib=1.19.1 + - samtools=1.19.2 diff --git a/modules/ebi-metagenomics/bwamem2decontnobams/main.nf b/modules/ebi-metagenomics/bwamem2decontnobams/main.nf new file mode 100644 index 00000000..87deb990 --- /dev/null +++ b/modules/ebi-metagenomics/bwamem2decontnobams/main.nf @@ -0,0 +1,55 @@ +process BWAMEM2DECONTNOBAMS { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2d15960ccea84e249a150b7f5d4db3a42fc2d6c3-0' : + 'biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2d15960ccea84e249a150b7f5d4db3a42fc2d6c3-0' }" + + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(index) + + output: + tuple val(meta), path("*{_1,_2,_interleaved}.fq.gz"), emit: decont_reads + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` + if [[ "${meta.single_end}" == "true" ]]; then + bwa-mem2 \\ + mem \\ + -M \\ + -t $task.cpus \\ + \$INDEX \\ + $reads \\ + | samtools view -@ ${task.cpus} -f 4 -F 256 -uS - \\ + | samtools sort -@ ${task.cpus} -n -O bam - \\ + | samtools bam2fq -@ $task.cpus - | gzip --no-name > ${prefix}_interleaved.fq.gz + else + bwa-mem2 \\ + mem \\ + -M \\ + -t $task.cpus \\ + \$INDEX \\ + $reads \\ + | samtools view -@ ${task.cpus} -f 4 -F 256 -uS - \\ + | samtools sort -@ ${task.cpus} -n -O bam - \\ + | samtools bam2fq -@ ${task.cpus} -1 ${prefix}_1.fq.gz -2 ${prefix}_2.fq.gz -0 /dev/null -s /dev/null + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa-mem2: \$(bwa-mem2 version 2> /dev/null) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/ebi-metagenomics/bwamem2decontnobams/meta.yml b/modules/ebi-metagenomics/bwamem2decontnobams/meta.yml new file mode 100644 index 00000000..45cf782f --- /dev/null +++ b/modules/ebi-metagenomics/bwamem2decontnobams/meta.yml @@ -0,0 +1,56 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "bwamem2decontnobams" +description: Decontamination module using bwamem2 and samtools that generates fastq files on the fly +keywords: + - alignment + - decontamination + - fastq +tools: + - bwamem2: + description: "Mapping DNA sequences against a large reference genome" + tool_dev_url: "https://github.com/bwa-mem2/bwa-mem2" + - samtools: + description: "Tools for dealing with SAM, BAM and CRAM files" + documentation: "http://www.htslib.org/doc/1.1/samtools.html" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 + for single-end and paired-end data, respectively + - meta2: + type: map + description: | + Groovy Map containing reference genome information + e.g. [ id:'ref_name' ] + - index: + type: file + description: | + A list of BWA index files + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - decont_reads: + type: file + description: | + List of fastq files. Two files for paired-end reads and one file for single-end reads + +authors: + - "@EBI-metagenomics" +maintainers: + - "@EBI-metagenomics" diff --git a/modules/ebi-metagenomics/bwamem2decontnobams/tests/main.nf.test b/modules/ebi-metagenomics/bwamem2decontnobams/tests/main.nf.test new file mode 100644 index 00000000..03114b12 --- /dev/null +++ b/modules/ebi-metagenomics/bwamem2decontnobams/tests/main.nf.test @@ -0,0 +1,84 @@ +nextflow_workflow { + + name "Test module bwamem2decontnobams" + script "../main.nf" + workflow "BWAMEM2DECONTNOBAMS" + + tag "modules" + tag "modules_nfcore" + tag "bwamem2decontnobams" + + test("Illumina paired_end decontamination with MGYG000317500") { + when { + workflow { + """ + // Define inputs of the workflow: + input[0] = Channel.of([ + [ id: "test", single_end: false ], + [ + file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/test_R1.fastq.gz", checkIfExists: true), + file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/test_R2.fastq.gz", checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id: "MGYG000317500" ], + [ + file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna", checkIfExists: true), + file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.0123", checkIfExists: true), + file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.amb", checkIfExists: true), + file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.ann", checkIfExists: true), + file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.bwt.2bit.64", checkIfExists: true), + file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.pac", checkIfExists: true) + ] + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + // gzip stores extra information in the header, which makes comparing checksums impossible between operating systems. + // that is why we use the sizes of files, and that sort of thing + { assert path(workflow.out.decont_reads.get(0).get(1).get(0)).linesGzip.size() == 374028 }, + { assert path(workflow.out.decont_reads.get(0).get(1).get(1)).linesGzip.size() == 374028 } + ) + } + } + + + test("Illumina single_end decontamination with MGYG000317500") { + when { + workflow { + """ + // Define inputs of the workflow: + input[0] = Channel.of([ + [ id: "test", single_end: true ], + [ + file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/test_R1.fastq.gz", checkIfExists: true) + ] + ]) + + input[1] = Channel.of([ + [ id: "MGYG000317500" ], + [ + file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna", checkIfExists: true), + file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.0123", checkIfExists: true), + file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.amb", checkIfExists: true), + file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.ann", checkIfExists: true), + file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.bwt.2bit.64", checkIfExists: true), + file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.pac", checkIfExists: true) + ] + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert path(workflow.out.decont_reads.get(0).get(1)).linesGzip.size() == 378312 } + ) + } + } +} diff --git a/modules/ebi-metagenomics/bwamem2decontnobams/tests/tags.yml b/modules/ebi-metagenomics/bwamem2decontnobams/tests/tags.yml new file mode 100644 index 00000000..7c794e06 --- /dev/null +++ b/modules/ebi-metagenomics/bwamem2decontnobams/tests/tags.yml @@ -0,0 +1,2 @@ +bwamem2decontnobams: + - modules/ebi-metagenomics/bwamem2decontnobams/**