Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor checkm2/databasedownload using aria2 #6654

Merged
merged 11 commits into from
Sep 25, 2024
2 changes: 1 addition & 1 deletion modules/nf-core/checkm2/databasedownload/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ channels:
- conda-forge
- bioconda
dependencies:
- bioconda::checkm2=1.0.2
- conda-forge::aria2=1.36.0
31 changes: 18 additions & 13 deletions modules/nf-core/checkm2/databasedownload/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@ import groovy.json.JsonSlurper
process CHECKM2_DATABASEDOWNLOAD {
label 'process_single'

conda "bioconda::checkm2=1.0.2"
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/checkm2:1.0.2--pyh7cba7a3_0':
'biocontainers/checkm2:1.0.2--pyh7cba7a3_0' }"
'https://depot.galaxyproject.org/singularity/aria2:1.36.0':
'biocontainers/aria2:1.36.0' }"

input:
val(db_zenodo_id)

output:
tuple val(meta), path("checkm2_db_v${db_version}.dmnd"), emit: database
Expand All @@ -16,25 +19,27 @@ process CHECKM2_DATABASEDOWNLOAD {
task.ext.when == null || task.ext.when

script:
zenodo_id = 5571251
def jsonSlurper = new JsonSlurper()
db_version = jsonSlurper.parseText(file("https://zenodo.org/api/records/${zenodo_id}").text).metadata.version
meta = [id: 'checkm2_db', version: db_version]
def args = task.ext.args ?: ''
zenodo_id = db_zenodo_id ?: 5571251 // Default to latest version if no ID provided
api_data = (new JsonSlurper()).parseText(file("https://zenodo.org/api/records/${zenodo_id}").text)
db_version = api_data.metadata.version
checksum = api_data.files[0].checksum.replaceFirst(/^md5:/, "md5=")
meta = [id: 'checkm2_db', version: db_version]
"""
# Automatic download is broken when using singularity/apptainer (https://github.com/chklovski/CheckM2/issues/73)
# So we download the database manually
wget https://zenodo.org/records/${zenodo_id}/files/checkm2_database.tar.gz
# So it's necessary to download the database manually
aria2c \
${args} \
--checksum ${checksum} \
https://zenodo.org/records/${zenodo_id}/files/checkm2_database.tar.gz

tar -xzf checkm2_database.tar.gz
db_path=\$(find -name *.dmnd)
MD5=\$(grep -o '\\.dmnd": "[^"]*"' CONTENTS.json | cut -d '"' -f 3)

md5sum -c <<< "\$MD5 \$db_path"
mv \$db_path checkm2_db_v${db_version}.dmnd

cat <<-END_VERSIONS > versions.yml
"${task.process}":
checkm2: \$(checkm2 --version)
aria2: \$(echo \$(aria2c --version 2>&1) | grep 'aria2 version' | cut -f3 -d ' ')
END_VERSIONS
"""

Expand Down
5 changes: 5 additions & 0 deletions modules/nf-core/checkm2/databasedownload/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ tools:
licence: ["GPL v3"]
identifier: ""

input:
- - db_zenodo_id:
type: integer
description: Zenodo ID of the CheckM2 database to download

output:
- database:
- meta:
Expand Down
8 changes: 8 additions & 0 deletions modules/nf-core/checkm2/databasedownload/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@ nextflow_process {

test("Test CheckM2 Database Download") {

when {
process {
"""
input[0] = []
"""
}
}

then {
assertAll(
{ assert process.success },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,9 @@
"Test CheckM2 Database Download": {
"content": [
[
"versions.yml:md5,a0f7b47476ffc62ce27870f0503e6c04"
"versions.yml:md5,6201d5ac7aca6e32b98daf4f8656aa2a"
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.2"
},
"timestamp": "2024-06-12T06:29:33.222099893"
"timestamp": "2024-09-16T22:23:54.183040031"
}
}
2 changes: 1 addition & 1 deletion modules/nf-core/checkm2/predict/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ process CHECKM2_PREDICT {
tag "${meta.id}"
label 'process_medium'

conda "bioconda::checkm2=1.0.2"
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/checkm2:1.0.2--pyh7cba7a3_0':
'biocontainers/checkm2:1.0.2--pyh7cba7a3_0' }"
Expand Down
5 changes: 5 additions & 0 deletions modules/nf-core/checkm2/predict/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ nextflow_process {
setup {
run("CHECKM2_DATABASEDOWNLOAD") {
script "../../databasedownload/main.nf"
process {
"""
input[0] = []
"""
}
}
}

Expand Down
6 changes: 1 addition & 5 deletions modules/nf-core/checkm2/predict/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@
"versions.yml:md5,088ec2d8a46efd530c11019328064bff"
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.2"
},
"timestamp": "2024-06-12T06:34:56.335651565"
"timestamp": "2024-09-16T22:43:50.787486798"
}
}
Loading