Skip to content

Commit

Permalink
New module/bwamem2 decont nobams (#34)
Browse files Browse the repository at this point in the history
* branch test
* Decont module no bam generation
* Module test added
  • Loading branch information
Ales-ibt authored Apr 9, 2024
1 parent ff19dc0 commit fa97e0d
Show file tree
Hide file tree
Showing 5 changed files with 208 additions and 0 deletions.
11 changes: 11 additions & 0 deletions modules/ebi-metagenomics/bwamem2decontnobams/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
name: bwamem2decontnobams

channels:
- conda-forge
- bioconda
- defaults

dependencies:
- bwa-mem2=2.2.1
- htslib=1.19.1
- samtools=1.19.2
55 changes: 55 additions & 0 deletions modules/ebi-metagenomics/bwamem2decontnobams/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
process BWAMEM2DECONTNOBAMS {
tag "$meta.id"
label 'process_high'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2d15960ccea84e249a150b7f5d4db3a42fc2d6c3-0' :
'biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2d15960ccea84e249a150b7f5d4db3a42fc2d6c3-0' }"


input:
tuple val(meta), path(reads)
tuple val(meta2), path(index)

output:
tuple val(meta), path("*{_1,_2,_interleaved}.fq.gz"), emit: decont_reads
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'`
if [[ "${meta.single_end}" == "true" ]]; then
bwa-mem2 \\
mem \\
-M \\
-t $task.cpus \\
\$INDEX \\
$reads \\
| samtools view -@ ${task.cpus} -f 4 -F 256 -uS - \\
| samtools sort -@ ${task.cpus} -n -O bam - \\
| samtools bam2fq -@ $task.cpus - | gzip --no-name > ${prefix}_interleaved.fq.gz
else
bwa-mem2 \\
mem \\
-M \\
-t $task.cpus \\
\$INDEX \\
$reads \\
| samtools view -@ ${task.cpus} -f 4 -F 256 -uS - \\
| samtools sort -@ ${task.cpus} -n -O bam - \\
| samtools bam2fq -@ ${task.cpus} -1 ${prefix}_1.fq.gz -2 ${prefix}_2.fq.gz -0 /dev/null -s /dev/null
fi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bwa-mem2: \$(bwa-mem2 version 2> /dev/null)
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
}
56 changes: 56 additions & 0 deletions modules/ebi-metagenomics/bwamem2decontnobams/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "bwamem2decontnobams"
description: Decontamination module using bwamem2 and samtools that generates fastq files on the fly
keywords:
- alignment
- decontamination
- fastq
tools:
- bwamem2:
description: "Mapping DNA sequences against a large reference genome"
tool_dev_url: "https://github.com/bwa-mem2/bwa-mem2"
- samtools:
description: "Tools for dealing with SAM, BAM and CRAM files"
documentation: "http://www.htslib.org/doc/1.1/samtools.html"

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2
for single-end and paired-end data, respectively
- meta2:
type: map
description: |
Groovy Map containing reference genome information
e.g. [ id:'ref_name' ]
- index:
type: file
description: |
A list of BWA index files
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- decont_reads:
type: file
description: |
List of fastq files. Two files for paired-end reads and one file for single-end reads
authors:
- "@EBI-metagenomics"
maintainers:
- "@EBI-metagenomics"
84 changes: 84 additions & 0 deletions modules/ebi-metagenomics/bwamem2decontnobams/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
nextflow_workflow {

name "Test module bwamem2decontnobams"
script "../main.nf"
workflow "BWAMEM2DECONTNOBAMS"

tag "modules"
tag "modules_nfcore"
tag "bwamem2decontnobams"

test("Illumina paired_end decontamination with MGYG000317500") {
when {
workflow {
"""
// Define inputs of the workflow:
input[0] = Channel.of([
[ id: "test", single_end: false ],
[
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/test_R1.fastq.gz", checkIfExists: true),
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/test_R2.fastq.gz", checkIfExists: true)
]
])
input[1] = Channel.of([
[ id: "MGYG000317500" ],
[
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna", checkIfExists: true),
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.0123", checkIfExists: true),
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.amb", checkIfExists: true),
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.ann", checkIfExists: true),
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.bwt.2bit.64", checkIfExists: true),
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.pac", checkIfExists: true)
]
])
"""
}
}

then {
assertAll(
{ assert workflow.success },
// gzip stores extra information in the header, which makes comparing checksums impossible between operating systems.
// that is why we use the sizes of files, and that sort of thing
{ assert path(workflow.out.decont_reads.get(0).get(1).get(0)).linesGzip.size() == 374028 },
{ assert path(workflow.out.decont_reads.get(0).get(1).get(1)).linesGzip.size() == 374028 }
)
}
}


test("Illumina single_end decontamination with MGYG000317500") {
when {
workflow {
"""
// Define inputs of the workflow:
input[0] = Channel.of([
[ id: "test", single_end: true ],
[
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/test_R1.fastq.gz", checkIfExists: true)
]
])

input[1] = Channel.of([
[ id: "MGYG000317500" ],
[
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna", checkIfExists: true),
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.0123", checkIfExists: true),
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.amb", checkIfExists: true),
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.ann", checkIfExists: true),
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.bwt.2bit.64", checkIfExists: true),
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.pac", checkIfExists: true)
]
])
"""
}
}

then {
assertAll(
{ assert workflow.success },
{ assert path(workflow.out.decont_reads.get(0).get(1)).linesGzip.size() == 378312 }
)
}
}
}
2 changes: 2 additions & 0 deletions modules/ebi-metagenomics/bwamem2decontnobams/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
bwamem2decontnobams:
- modules/ebi-metagenomics/bwamem2decontnobams/**

0 comments on commit fa97e0d

Please sign in to comment.