Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added tantan module #6256

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions modules/nf-core/tantan/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
name: "tantan"
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- "bioconda::tantan=50"
65 changes: 65 additions & 0 deletions modules/nf-core/tantan/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
process TANTAN {
tag "${meta.id}"
label 'process_low'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/tantan:50--h43eeafb_0':
'biocontainers/tantan:50--h43eeafb_0' }"

input:
tuple val(meta), path(fasta)

output:
tuple val(meta), path("${prefix}.fasta.gz") , optional:true , emit: masked_fasta
tuple val(meta), path("${prefix}.tsv") , optional:true , emit: repeat_probs
tuple val(meta), path("${prefix}.tsv") , optional:true , emit: repeat_counts
tuple val(meta), path("${prefix}.bed") , optional:true , emit: bed
tuple val(meta), path("${prefix}.fasta.gz") , optional:true , emit: tandem_repeats
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def format_pattern = /(-f)+\s+(\d)/
def format_matcher = (args =~ format_pattern)
def format_num = format_matcher[0][2]
def output_format = format_num == 1 || format_num == 2 ? ( format_num == 3 ? "bed" : "tsv" ) : "fasta"
prefix = task.ext.prefix ?: "${meta.id}"
"""
tantan \\
${fasta} \\
${args} > ${prefix}.${output_format}

if [ -f ${prefix}.fasta ]; then
gzip ${prefix}.fasta
fi

cat <<-END_VERSIONS > versions.yml
"${task.process}":
tantan: \$(tantan --version 2>&1 | sed 's/^.*tantan //')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def format_pattern = /(-f)+\s+(\d)/
def format_matcher = (args =~ format_pattern)
def format_num = format_matcher[0][2]
def output_format = format_num == 1 || format_num == 2 ? ( format_num == 3 ? "bed" : "tsv" ) : "fasta"
prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.${output_format}

if [ -f ${prefix}.fasta ]; then
gzip ${prefix}.fasta
fi

cat <<-END_VERSIONS > versions.yml
"${task.process}":
tantan: \$(tantan --version 2>&1 | sed 's/^.*tantan //')
END_VERSIONS
"""
}
47 changes: 47 additions & 0 deletions modules/nf-core/tantan/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: "tantan"
description: "Identify simple regions / low complexity / tandem repeats in DNA or protein sequences."
keywords:
- mask
- repeat
- tandem
- complexity
tools:
- "tantan":
description: "tantan masks simple regions (low complexity & short-period tandem repeats) in biological sequences."
homepage: "https://gitlab.com/mcfrith/tantan/-/blob/main/README.rst"
documentation: "https://gitlab.com/mcfrith/tantan/-/blob/main/README.rst"
tool_dev_url: "https://gitlab.com/mcfrith/tantan"
doi: "10.1093/nar/gkq1212"
licence: ['GPL v3-or-later']

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- fasta:
type: file
description: FASTA file containing a nucleotide sequence.
pattern: "*.{fa,fa.gz,fasta,fasta.gz,fna,fna.gz}"
Comment on lines +23 to +26
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tantan also accepts FASTQ input; maybe you can rename the input channel to fastx and expand the pattern to accept fq, fq.gz, fastq and fastq.gz?


output:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You need to describe the repeat_probs, repeat_counts, bed, and tandem_repeats channels too.

- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- masked_fasta:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you allow FASTQ input then either you need one output channel that will contain FASTA or FASTQ, or you need one separate channel for each format.

type: file
description: |
FASTA file where all masked regions are replaced
with lowercase letters.
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@CarsonJM"
maintainers:
- "@CarsonJM"
100 changes: 100 additions & 0 deletions modules/nf-core/tantan/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
nextflow_process {

name "Test Process TANTAN"
script "../main.nf"
process "TANTAN"
config "./nextflow.config"

tag "modules"
tag "modules_nfcore"
tag "tantan"

test("sarscov2 - fasta.gz - outfmt fasta") {
when {
params {
tantan_args = '-f 0'
}
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true),
]
"""
}
}
then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}

test("sarscov2 - fasta.gz - outfmt tsv") {
when {
params {
tantan_args = '-f 1'
}
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true),
]
"""
}
}
then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}

test("sarscov2 - fasta.gz - outfmt bed") {
when {
params {
tantan_args = '-f 3'
}
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true),
]
"""
}
}
then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}

test("sarscov2 - fasta.gz - stub") {
options "-stub"
when {
params {
tantan_args = '-f 0'
}
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true),
]
"""
}
}
then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}
}
Loading
Loading