Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove compressed flag from all mmseqs modules #7211

Merged
merged 14 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions modules/nf-core/mmseqs/cluster/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ process MMSEQS_CLUSTER {
${prefix}/${prefix} \\
tmp1 \\
$args \\
--threads ${task.cpus} \\
--compressed 1
--threads ${task.cpus}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
6 changes: 3 additions & 3 deletions modules/nf-core/mmseqs/cluster/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@
"test_output_cluster.index"
],
[
"test_output_cluster.dbtype:md5,5c879eb8a8613fd4537b919e7d68d089",
"test_output_cluster.index:md5,ca7e1f0967222b06d7e05e47bcbe1e50"
"test_output_cluster.dbtype:md5,b9d9c6dbc098c97ae446f612efd8eafd",
"test_output_cluster.index:md5,9848b52b6df827d80a04f7c71c50056b"
],
"versions.yml:md5,4acad55952c6d1fb41ad7f5a44468aed"
],
"meta": {
"nf-test": "0.9.2",
"nextflow": "24.10.2"
},
"timestamp": "2024-12-05T10:44:45.713270981"
"timestamp": "2024-12-15T19:30:14.704271821"
}
}
3 changes: 1 addition & 2 deletions modules/nf-core/mmseqs/createdb/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ process MMSEQS_CREATEDB {
createdb \\
${sequence_name} \\
${prefix}/${prefix} \\
$args \\
--compressed 1
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
28 changes: 14 additions & 14 deletions modules/nf-core/mmseqs/createdb/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
"single_end": false
},
[
"test:md5,7c3c2c5926cf8fa82e66b9628f680256",
"test.dbtype:md5,c8ed20c23ba91f4577f84c940c86c7db",
"test.index:md5,5b2fd8abd0ad3fee24738af7082e6a6e",
"test:md5,a2cda8768736a7a317a09d61556194bd",
"test.dbtype:md5,4352d88a78aa39750bf70cd6f27bcaa5",
"test.index:md5,4ba298b011e2472ce9f6b99fe6b6e3d5",
"test.lookup:md5,32f88756dbcb6aaf7b239b0d61730f1b",
"test.source:md5,9ada5b3ea6e1a7e16c4418eb98ae8d9d",
"test_h:md5,8c29f5ed94d83d7115e9c8a883ce358d",
"test_h.dbtype:md5,8895d3d8e9322aedbf45249dfb3ddb0a",
"test_h.index:md5,87c7c8c6d16018ebfaa6f408391a5ae2"
"test_h:md5,21c399702a071bdeecce09f9d1df4531",
"test_h.dbtype:md5,740bab4f9ec8808aedb68d6b1281aeb2",
"test_h.index:md5,d767fb43b37c0a644c676b00f9f93477"
]
]
],
Expand All @@ -27,7 +27,7 @@
"nf-test": "0.9.2",
"nextflow": "24.10.2"
},
"timestamp": "2024-12-05T10:35:27.322864475"
"timestamp": "2024-12-15T19:30:25.339409238"
},
"Should build an mmseqs db from a zipped amino acid sequence file": {
"content": [
Expand All @@ -37,14 +37,14 @@
"id": "test"
},
[
"test:md5,4b494965ed7ab67da8ca3f39523eb104",
"test.dbtype:md5,152afd7bf4dbe26f85032eee0269201a",
"test.index:md5,46f9d884e9a7f442fe1cd2ce339734e3",
"test:md5,1162504bc65aacf734abdcb0cdbe87de",
"test.dbtype:md5,f1d3ff8443297732862df21dc4e57262",
"test.index:md5,8cdcbc06c2b99fdb09f3d1735a76def9",
"test.lookup:md5,3e27cb93d9ee875ad42a6f32f5651bdc",
"test.source:md5,eaa64fc8a5f7ec1ee49b0dcbd1a72e9d",
"test_h:md5,6e798b81c70d191f78939c2dd6223a7f",
"test_h.dbtype:md5,8895d3d8e9322aedbf45249dfb3ddb0a",
"test_h.index:md5,d5ac49ff56df064b980fa0eb5da57673"
"test_h:md5,f258f8cc04f83c270a75e8b00a6d2d89",
"test_h.dbtype:md5,740bab4f9ec8808aedb68d6b1281aeb2",
"test_h.index:md5,844bf1950bcd37284fdc5d7117ee4241"
]
]
],
Expand All @@ -56,6 +56,6 @@
"nf-test": "0.9.2",
"nextflow": "24.10.2"
},
"timestamp": "2024-12-05T10:35:33.418552595"
"timestamp": "2024-12-15T19:30:34.93361715"
}
}
20 changes: 10 additions & 10 deletions modules/nf-core/mmseqs/createindex/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@ process MMSEQS_CREATEINDEX {
label 'process_high'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mmseqs2:15.6f452--pl5321h6a68c12_0':
'biocontainers/mmseqs2:15.6f452--pl5321h6a68c12_0' }"
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://depot.galaxyproject.org/singularity/mmseqs2:15.6f452--pl5321h6a68c12_0'
: 'biocontainers/mmseqs2:15.6f452--pl5321h6a68c12_0'}"

input:
tuple val(meta), path(db)

output:
tuple val(meta), path(db) , emit: db_indexed
path "versions.yml" , emit: versions
tuple val(meta), path(db), emit: db_indexed
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when
Expand All @@ -23,15 +23,14 @@ process MMSEQS_CREATEINDEX {
def prefix = task.ext.prefix ?: "${meta.id}"

"""
DB_INPUT_PATH_NAME=\$(find -L "$db/" -maxdepth 1 -name "$args2" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' )
DB_INPUT_PATH_NAME=\$(find -L "${db}/" -maxdepth 1 -name "${args2}" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' )

mmseqs \\
createindex \\
\${DB_INPUT_PATH_NAME} \\
tmp1 \\
$args \\
--threads ${task.cpus} \\
--compressed 1
${args} \\
--threads ${task.cpus}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand All @@ -40,8 +39,9 @@ process MMSEQS_CREATEINDEX {
"""

stub:
def args2 = task.ext.args2 ?: "*.dbtype"
"""
DB_INPUT_PATH_NAME=\$(find -L "$db/" -maxdepth 1 -name "$args2" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' )
DB_INPUT_PATH_NAME=\$(find -L "${db}/" -maxdepth 1 -name "${args2}" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' )

touch "\${DB_PATH_NAME}.idx"

Expand Down
3 changes: 1 addition & 2 deletions modules/nf-core/mmseqs/createtsv/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@ process MMSEQS_CREATETSV {
\$DB_RESULT_PATH_NAME \\
${prefix}.tsv \\
$args \\
--threads ${task.cpus} \\
--compressed 1
--threads ${task.cpus}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
1 change: 0 additions & 1 deletion modules/nf-core/mmseqs/databases/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ process MMSEQS_DATABASES {
${prefix}/database \\
tmp/ \\
--threads ${task.cpus} \\
--compressed 1 \\
${args}
cat <<-END_VERSIONS > versions.yml
Expand Down
25 changes: 13 additions & 12 deletions modules/nf-core/mmseqs/easysearch/main.nf
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@

process MMSEQS_EASYSEARCH {
tag "$meta.id"
tag "${meta.id}"
label 'process_high'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mmseqs2:15.6f452--pl5321h6a68c12_0':
'biocontainers/mmseqs2:15.6f452--pl5321h6a68c12_0' }"
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://depot.galaxyproject.org/singularity/mmseqs2:15.6f452--pl5321h6a68c12_0'
: 'biocontainers/mmseqs2:15.6f452--pl5321h6a68c12_0'}"

input:
tuple val(meta) , path(fasta)
tuple val(meta), path(fasta)
tuple val(meta2), path(db_target)

output:
tuple val(meta), path("${prefix}.tsv"), emit: tsv
path "versions.yml" , emit: versions
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when
Expand All @@ -27,17 +26,16 @@ process MMSEQS_EASYSEARCH {
mkdir -p ${prefix}

# Extract files with specified args based suffix | remove suffix | isolate longest common substring of files
DB_TARGET_PATH_NAME=\$(find -L "$db_target/" -maxdepth 1 -name "$args2" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' )
DB_TARGET_PATH_NAME=\$(find -L "${db_target}/" -maxdepth 1 -name "${args2}" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' )

mmseqs \\
easy-search \\
$fasta \\
${fasta} \\
\$DB_TARGET_PATH_NAME \\
${prefix}.tsv \\
tmp1 \\
$args \\
--threads ${task.cpus} \\
--compressed 1
${args} \\
--threads ${task.cpus}


cat <<-END_VERSIONS > versions.yml
Expand All @@ -47,6 +45,9 @@ process MMSEQS_EASYSEARCH {
"""

stub:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: "*.dbtype"
prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.tsv

Expand Down
1 change: 0 additions & 1 deletion modules/nf-core/mmseqs/easysearch/meta.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json
name: "mmseqs_easysearch"
description: Searches for the sequences of a fasta file in a database using MMseqs2
keywords:
Expand Down
68 changes: 68 additions & 0 deletions modules/nf-core/mmseqs/easysearch/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
nextflow_process {

name "Test Process MMSEQS_EASYSEARCH"
script "../main.nf"
process "MMSEQS_EASYSEARCH"
config './nextflow.config'

tag "modules"
tag "modules_nfcore"
tag "mmseqs"
tag "mmseqs/easysearch"
tag "mmseqs/createdb"

setup {

run("MMSEQS_CREATEDB") {
script "../../createdb/main.nf"
process {
"""
input[0] = [ [id: 'test_query', single_end: true], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true) ]
"""
}
}
}

test("sarscov2 - illumina - contigs - fasta") {

when {
process {
"""
input[0] = [ [id: 'test_scaffolds', single_end: true], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/scaffolds.fasta', checkIfExists: true) ]
input[1] = MMSEQS_CREATEDB.out.db
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("sarscov2 - illumina - contigs - fasta - stub") {

options "-stub"

when {
process {
"""
input[0] = [ [id: 'test_scaffolds', single_end: true], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/scaffolds.fasta', checkIfExists: true) ]
input[1] = MMSEQS_CREATEDB.out.db
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
72 changes: 72 additions & 0 deletions modules/nf-core/mmseqs/easysearch/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
{
"sarscov2 - illumina - contigs - fasta": {
"content": [
{
"0": [
[
{
"id": "test_scaffolds",
"single_end": true
},
"test_scaffolds.tsv:md5,98a6e138ad41ed04366808799fd6a4b8"
]
],
"1": [
"versions.yml:md5,6e4d1e0d47fdc34c4ef32ad7b030cf70"
],
"tsv": [
[
{
"id": "test_scaffolds",
"single_end": true
},
"test_scaffolds.tsv:md5,98a6e138ad41ed04366808799fd6a4b8"
]
],
"versions": [
"versions.yml:md5,6e4d1e0d47fdc34c4ef32ad7b030cf70"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.10.2"
},
"timestamp": "2025-01-15T10:08:31.865068417"
},
"sarscov2 - illumina - contigs - fasta - stub": {
"content": [
{
"0": [
[
{
"id": "test_scaffolds",
"single_end": true
},
"test_scaffolds.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"1": [
"versions.yml:md5,bef949a977594a12d605c2d30198ab27"
],
"tsv": [
[
{
"id": "test_scaffolds",
"single_end": true
},
"test_scaffolds.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"versions": [
"versions.yml:md5,bef949a977594a12d605c2d30198ab27"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.10.2"
},
"timestamp": "2025-01-15T10:09:30.132833597"
}
}
5 changes: 5 additions & 0 deletions modules/nf-core/mmseqs/easysearch/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {
withName: MMSEQS_EASYSEARCH {
ext.args = '--remove-tmp-files 1 --search-type 2'
}
}
3 changes: 1 addition & 2 deletions modules/nf-core/mmseqs/linclust/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ process MMSEQS_LINCLUST {
${prefix}/${prefix} \\
tmp1 \\
$args \\
--threads ${task.cpus} \\
--compressed 1
--threads ${task.cpus}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
Loading
Loading