-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
New module/bwamem2 decont nobams (#34)
* branch test * Decont module no bam generation * Module test added
- Loading branch information
Showing
5 changed files
with
208 additions
and
0 deletions.
There are no files selected for viewing
11 changes: 11 additions & 0 deletions
11
modules/ebi-metagenomics/bwamem2decontnobams/environment.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
name: bwamem2decontnobams | ||
|
||
channels: | ||
- conda-forge | ||
- bioconda | ||
- defaults | ||
|
||
dependencies: | ||
- bwa-mem2=2.2.1 | ||
- htslib=1.19.1 | ||
- samtools=1.19.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
process BWAMEM2DECONTNOBAMS { | ||
tag "$meta.id" | ||
label 'process_high' | ||
|
||
conda "${moduleDir}/environment.yml" | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2d15960ccea84e249a150b7f5d4db3a42fc2d6c3-0' : | ||
'biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2d15960ccea84e249a150b7f5d4db3a42fc2d6c3-0' }" | ||
|
||
|
||
input: | ||
tuple val(meta), path(reads) | ||
tuple val(meta2), path(index) | ||
|
||
output: | ||
tuple val(meta), path("*{_1,_2,_interleaved}.fq.gz"), emit: decont_reads | ||
path "versions.yml", emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def args = task.ext.args ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
""" | ||
INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` | ||
if [[ "${meta.single_end}" == "true" ]]; then | ||
bwa-mem2 \\ | ||
mem \\ | ||
-M \\ | ||
-t $task.cpus \\ | ||
\$INDEX \\ | ||
$reads \\ | ||
| samtools view -@ ${task.cpus} -f 4 -F 256 -uS - \\ | ||
| samtools sort -@ ${task.cpus} -n -O bam - \\ | ||
| samtools bam2fq -@ $task.cpus - | gzip --no-name > ${prefix}_interleaved.fq.gz | ||
else | ||
bwa-mem2 \\ | ||
mem \\ | ||
-M \\ | ||
-t $task.cpus \\ | ||
\$INDEX \\ | ||
$reads \\ | ||
| samtools view -@ ${task.cpus} -f 4 -F 256 -uS - \\ | ||
| samtools sort -@ ${task.cpus} -n -O bam - \\ | ||
| samtools bam2fq -@ ${task.cpus} -1 ${prefix}_1.fq.gz -2 ${prefix}_2.fq.gz -0 /dev/null -s /dev/null | ||
fi | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
bwa-mem2: \$(bwa-mem2 version 2> /dev/null) | ||
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') | ||
END_VERSIONS | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
--- | ||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json | ||
name: "bwamem2decontnobams" | ||
description: Decontamination module using bwamem2 and samtools that generates fastq files on the fly | ||
keywords: | ||
- alignment | ||
- decontamination | ||
- fastq | ||
tools: | ||
- bwamem2: | ||
description: "Mapping DNA sequences against a large reference genome" | ||
tool_dev_url: "https://github.com/bwa-mem2/bwa-mem2" | ||
- samtools: | ||
description: "Tools for dealing with SAM, BAM and CRAM files" | ||
documentation: "http://www.htslib.org/doc/1.1/samtools.html" | ||
|
||
input: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'sample1', single_end:false ]` | ||
- reads: | ||
type: file | ||
description: | | ||
List of input FastQ files of size 1 and 2 | ||
for single-end and paired-end data, respectively | ||
- meta2: | ||
type: map | ||
description: | | ||
Groovy Map containing reference genome information | ||
e.g. [ id:'ref_name' ] | ||
- index: | ||
type: file | ||
description: | | ||
A list of BWA index files | ||
output: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'sample1', single_end:false ]` | ||
- versions: | ||
type: file | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
- decont_reads: | ||
type: file | ||
description: | | ||
List of fastq files. Two files for paired-end reads and one file for single-end reads | ||
authors: | ||
- "@EBI-metagenomics" | ||
maintainers: | ||
- "@EBI-metagenomics" |
84 changes: 84 additions & 0 deletions
84
modules/ebi-metagenomics/bwamem2decontnobams/tests/main.nf.test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
nextflow_workflow { | ||
|
||
name "Test module bwamem2decontnobams" | ||
script "../main.nf" | ||
workflow "BWAMEM2DECONTNOBAMS" | ||
|
||
tag "modules" | ||
tag "modules_nfcore" | ||
tag "bwamem2decontnobams" | ||
|
||
test("Illumina paired_end decontamination with MGYG000317500") { | ||
when { | ||
workflow { | ||
""" | ||
// Define inputs of the workflow: | ||
input[0] = Channel.of([ | ||
[ id: "test", single_end: false ], | ||
[ | ||
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/test_R1.fastq.gz", checkIfExists: true), | ||
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/test_R2.fastq.gz", checkIfExists: true) | ||
] | ||
]) | ||
input[1] = Channel.of([ | ||
[ id: "MGYG000317500" ], | ||
[ | ||
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna", checkIfExists: true), | ||
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.0123", checkIfExists: true), | ||
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.amb", checkIfExists: true), | ||
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.ann", checkIfExists: true), | ||
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.bwt.2bit.64", checkIfExists: true), | ||
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.pac", checkIfExists: true) | ||
] | ||
]) | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assertAll( | ||
{ assert workflow.success }, | ||
// gzip stores extra information in the header, which makes comparing checksums impossible between operating systems. | ||
// that is why we use the sizes of files, and that sort of thing | ||
{ assert path(workflow.out.decont_reads.get(0).get(1).get(0)).linesGzip.size() == 374028 }, | ||
{ assert path(workflow.out.decont_reads.get(0).get(1).get(1)).linesGzip.size() == 374028 } | ||
) | ||
} | ||
} | ||
|
||
|
||
test("Illumina single_end decontamination with MGYG000317500") { | ||
when { | ||
workflow { | ||
""" | ||
// Define inputs of the workflow: | ||
input[0] = Channel.of([ | ||
[ id: "test", single_end: true ], | ||
[ | ||
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/test_R1.fastq.gz", checkIfExists: true) | ||
] | ||
]) | ||
|
||
input[1] = Channel.of([ | ||
[ id: "MGYG000317500" ], | ||
[ | ||
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna", checkIfExists: true), | ||
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.0123", checkIfExists: true), | ||
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.amb", checkIfExists: true), | ||
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.ann", checkIfExists: true), | ||
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.bwt.2bit.64", checkIfExists: true), | ||
file("${baseDir}/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/data/MGYG000317500.fna.pac", checkIfExists: true) | ||
] | ||
]) | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assertAll( | ||
{ assert workflow.success }, | ||
{ assert path(workflow.out.decont_reads.get(0).get(1)).linesGzip.size() == 378312 } | ||
) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
bwamem2decontnobams: | ||
- modules/ebi-metagenomics/bwamem2decontnobams/** |