-
Notifications
You must be signed in to change notification settings - Fork 695
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added tantan module #6256
base: master
Are you sure you want to change the base?
Added tantan module #6256
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
--- | ||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json | ||
name: "tantan" | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
- defaults | ||
dependencies: | ||
- "bioconda::tantan=50" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
process TANTAN { | ||
tag "${meta.id}" | ||
label 'process_low' | ||
|
||
conda "${moduleDir}/environment.yml" | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/tantan:50--h43eeafb_0': | ||
'biocontainers/tantan:50--h43eeafb_0' }" | ||
|
||
input: | ||
tuple val(meta), path(fasta) | ||
|
||
output: | ||
tuple val(meta), path("${prefix}.fasta.gz") , optional:true , emit: masked_fasta | ||
tuple val(meta), path("${prefix}.tsv") , optional:true , emit: repeat_probs | ||
tuple val(meta), path("${prefix}.tsv") , optional:true , emit: repeat_counts | ||
tuple val(meta), path("${prefix}.bed") , optional:true , emit: bed | ||
tuple val(meta), path("${prefix}.fasta.gz") , optional:true , emit: tandem_repeats | ||
path "versions.yml" , emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def args = task.ext.args ?: '' | ||
def format_pattern = /(-f)+\s+(\d)/ | ||
def format_matcher = (args =~ format_pattern) | ||
def format_num = format_matcher[0][2] | ||
def output_format = format_num == 1 || format_num == 2 ? ( format_num == 3 ? "bed" : "tsv" ) : "fasta" | ||
prefix = task.ext.prefix ?: "${meta.id}" | ||
""" | ||
tantan \\ | ||
${fasta} \\ | ||
${args} > ${prefix}.${output_format} | ||
|
||
if [ -f ${prefix}.fasta ]; then | ||
gzip ${prefix}.fasta | ||
fi | ||
|
||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
tantan: \$(tantan --version 2>&1 | sed 's/^.*tantan //') | ||
END_VERSIONS | ||
""" | ||
|
||
stub: | ||
def args = task.ext.args ?: '' | ||
def format_pattern = /(-f)+\s+(\d)/ | ||
def format_matcher = (args =~ format_pattern) | ||
def format_num = format_matcher[0][2] | ||
def output_format = format_num == 1 || format_num == 2 ? ( format_num == 3 ? "bed" : "tsv" ) : "fasta" | ||
prefix = task.ext.prefix ?: "${meta.id}" | ||
""" | ||
touch ${prefix}.${output_format} | ||
|
||
if [ -f ${prefix}.fasta ]; then | ||
gzip ${prefix}.fasta | ||
fi | ||
|
||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
tantan: \$(tantan --version 2>&1 | sed 's/^.*tantan //') | ||
END_VERSIONS | ||
""" | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
name: "tantan" | ||
description: "Identify simple regions / low complexity / tandem repeats in DNA or protein sequences." | ||
keywords: | ||
- mask | ||
- repeat | ||
- tandem | ||
- complexity | ||
tools: | ||
- "tantan": | ||
description: "tantan masks simple regions (low complexity & short-period tandem repeats) in biological sequences." | ||
homepage: "https://gitlab.com/mcfrith/tantan/-/blob/main/README.rst" | ||
documentation: "https://gitlab.com/mcfrith/tantan/-/blob/main/README.rst" | ||
tool_dev_url: "https://gitlab.com/mcfrith/tantan" | ||
doi: "10.1093/nar/gkq1212" | ||
licence: ['GPL v3-or-later'] | ||
|
||
input: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'sample1', single_end:false ]` | ||
- fasta: | ||
type: file | ||
description: FASTA file containing a nucleotide sequence. | ||
pattern: "*.{fa,fa.gz,fasta,fasta.gz,fna,fna.gz}" | ||
|
||
output: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You need to describe the |
||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'sample1', single_end:false ]` | ||
- masked_fasta: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you allow FASTQ input then either you need one output channel that will contain FASTA or FASTQ, or you need one separate channel for each format. |
||
type: file | ||
description: | | ||
FASTA file where all masked regions are replaced | ||
with lowercase letters. | ||
- versions: | ||
type: file | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
|
||
authors: | ||
- "@CarsonJM" | ||
maintainers: | ||
- "@CarsonJM" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
nextflow_process { | ||
|
||
name "Test Process TANTAN" | ||
script "../main.nf" | ||
process "TANTAN" | ||
config "./nextflow.config" | ||
|
||
tag "modules" | ||
tag "modules_nfcore" | ||
tag "tantan" | ||
|
||
test("sarscov2 - fasta.gz - outfmt fasta") { | ||
when { | ||
params { | ||
tantan_args = '-f 0' | ||
} | ||
process { | ||
""" | ||
input[0] = [ | ||
[ id:'test', single_end:false ], // meta map | ||
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true), | ||
] | ||
""" | ||
} | ||
} | ||
then { | ||
assertAll( | ||
{ assert process.success }, | ||
{ assert snapshot(process.out).match() } | ||
) | ||
} | ||
} | ||
|
||
test("sarscov2 - fasta.gz - outfmt tsv") { | ||
when { | ||
params { | ||
tantan_args = '-f 1' | ||
} | ||
process { | ||
""" | ||
input[0] = [ | ||
[ id:'test', single_end:false ], // meta map | ||
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true), | ||
] | ||
""" | ||
} | ||
} | ||
then { | ||
assertAll( | ||
{ assert process.success }, | ||
{ assert snapshot(process.out).match() } | ||
) | ||
} | ||
} | ||
|
||
test("sarscov2 - fasta.gz - outfmt bed") { | ||
when { | ||
params { | ||
tantan_args = '-f 3' | ||
} | ||
process { | ||
""" | ||
input[0] = [ | ||
[ id:'test', single_end:false ], // meta map | ||
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true), | ||
] | ||
""" | ||
} | ||
} | ||
then { | ||
assertAll( | ||
{ assert process.success }, | ||
{ assert snapshot(process.out).match() } | ||
) | ||
} | ||
} | ||
|
||
test("sarscov2 - fasta.gz - stub") { | ||
options "-stub" | ||
when { | ||
params { | ||
tantan_args = '-f 0' | ||
} | ||
process { | ||
""" | ||
input[0] = [ | ||
[ id:'test', single_end:false ], // meta map | ||
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true), | ||
] | ||
""" | ||
} | ||
} | ||
then { | ||
assertAll( | ||
{ assert process.success }, | ||
{ assert snapshot(process.out).match() } | ||
) | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tantan also accepts FASTQ input; maybe you can rename the input channel to
fastx
and expand the pattern to acceptfq
,fq.gz
,fastq
andfastq.gz
?