Skip to content

Commit

Permalink
New module: kraken2/build (#5212)
Browse files Browse the repository at this point in the history
* first commit

* use mkdir -p flag

* update assertions

* Update modules/nf-core/kraken2/build/main.nf

Co-authored-by: James A. Fellows Yates <[email protected]>

* update dependencies

* add args_id

* option for cleaning files

* Update modules/nf-core/kraken2/build/main.nf

Co-authored-by: James A. Fellows Yates <[email protected]>

* remove trailing whitespace

* remove trailing whitespace

* check prefix and database name

* Add slightly relaxed assrtions opts.k2d

* Change unmapped as well due to occasional variation

---------

Co-authored-by: James A. Fellows Yates <[email protected]>
  • Loading branch information
alxndrdiaz and jfy133 authored Mar 22, 2024
1 parent 7afd02d commit db87ca5
Show file tree
Hide file tree
Showing 7 changed files with 275 additions and 0 deletions.
10 changes: 10 additions & 0 deletions modules/nf-core/kraken2/build/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
name: "kraken2_build"
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- "bioconda::kraken2=2.1.3"
- "coreutils=9.4"
51 changes: 51 additions & 0 deletions modules/nf-core/kraken2/build/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
process KRAKEN2_BUILD {
tag "$meta.id"
label 'process_medium'
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-f8c4015c836dd3ce5c118cfed97ec8259bab9e9d:2e0b144854b4a3d69b5df7a0340a60db846cc8bf-0':
'biocontainers/mulled-v2-f8c4015c836dd3ce5c118cfed97ec8259bab9e9d:2e0b144854b4a3d69b5df7a0340a60db846cc8bf-0' }"

input:
tuple val(meta), path(db)
val cleaning

output:
tuple val(meta), path("$prefix"), emit: db
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
runclean = cleaning ? "kraken2-build --clean --db ${db}" : ""
"""
kraken2-build \\
--build \\
$args \\
--threads ${task.cpus} \\
--db ${db}
$runclean
if [[ \$(basename ${db}) != "${prefix}" ]]; then
mv ${db}/* ${prefix}
fi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
kraken2: \$(echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
"""
mkdir -p "$prefix"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
kraken2: \$(echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//')
END_VERSIONS
"""
}
48 changes: 48 additions & 0 deletions modules/nf-core/kraken2/build/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
name: "kraken2_build"
description: Builds Kraken2 database
keywords:
- metagenomics
- db
- classification
- build
- kraken2
tools:
- kraken2:
description: "Kraken2 is a system for assigning taxonomic labels to short DNA sequences, usually obtained through metagenomic studies."
homepage: https://ccb.jhu.edu/software/kraken2/
documentation: https://github.com/DerrickWood/kraken2/wiki/Manual
tool_dev_url: "https://github.com/DerrickWood/kraken2"
doi: 10.1186/s13059-019-1891-0
licence: ["MIT"]
args_id: "$args"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- db:
type: directory
description: contains required files to build the database
pattern: "*/"
- cleaning:
type: boolean
description: activate or deactivate (true or false) cleaning of intermediate files
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- db:
type: directory
description: contains the database that can be used to perform taxonomic classification
pattern: "*/"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@alxndrdiaz"
maintainers:
- "@alxndrdiaz"
101 changes: 101 additions & 0 deletions modules/nf-core/kraken2/build/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
nextflow_process {

name "Test Process KRAKEN2_BUILD"
script "../main.nf"
process "KRAKEN2_BUILD"
config "./nextflow.config"
tag "kraken2"
tag "kraken2/build"
tag "kraken2/add"
tag "gunzip"
tag "modules"
tag "modules_nfcore"

setup {

run("GUNZIP") {
script "modules/nf-core/gunzip/main.nf"
process {
"""
input[0] = Channel.of([
[],
file(
params.test_data['sarscov2']['metagenome']['prot_accession2taxid_gz'],
checkIfExists: true
)
])
"""
}
}

run("KRAKEN2_ADD") {
script "modules/nf-core/kraken2/add/main.nf"
process {
"""
input[0] = [
[ id:'test' ],
[
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true)
]
]
input[1] = file(params.test_data['sarscov2']['metagenome']['prot_names_dmp'], checkIfExists: true)
input[2] = file(params.test_data['sarscov2']['metagenome']['prot_nodes_dmp'], checkIfExists: true)
input[3] = GUNZIP.out.gunzip.map{ it[1] }
"""
}
}

}

test("sarscov2 protein_db") {

when {
process {
"""
input[0] = KRAKEN2_ADD.out.db
input[1] = true
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert process.out.db.get(0).get(1) ==~ ".*/test" },
{ assert snapshot (
path("${process.out.db[0][1]}/hash.k2d"),
path("${process.out.db[0][1]}/taxo.k2d"),
file("${process.out.db[0][1]}/opts.k2d").name,
file("${process.out.db[0][1]}/unmapped.txt").name
).match()
}
)
}

}

test("sarscov2 protein_db stub") {

options "-stub"

when {
process {
"""
input[0] = KRAKEN2_ADD.out.db
input[1] = true
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot (process.out).match()
}
)
}

}

}
52 changes: 52 additions & 0 deletions modules/nf-core/kraken2/build/tests/main.nf.test.snap

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions modules/nf-core/kraken2/build/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }

withName: KRAKEN2_BUILD {
ext.args = '--protein'
}

}
4 changes: 4 additions & 0 deletions modules/nf-core/kraken2/build/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
kraken2/build:
- modules/nf-core/kraken2/build/**
- modules/nf-core/kraken2/add/**
- modules/nf-core/gunzip/**

0 comments on commit db87ca5

Please sign in to comment.