diff --git a/modules/nf-core/plasflow/main.nf b/modules/nf-core/plasflow/main.nf new file mode 100644 index 00000000000..175c38867c3 --- /dev/null +++ b/modules/nf-core/plasflow/main.nf @@ -0,0 +1,76 @@ +process PLASFLOW { + tag "$meta.id" + label 'process_medium' + + conda "conda-forge::python=3.5 bioconda::plasflow=1.1.0" // conda-forge::tensorflow=1.5.0 + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/plasflow:1.1.0--py35_0': + 'biocontainers/plasflow:1.1.0--py35_0' }" + + input: + tuple val(meta), path(assembly) + + output: + tuple val(meta), path("*.tsv") , emit: tsv + tuple val(meta), path("*_chromosomes.fasta.gz") , emit: chromosomes + tuple val(meta), path("*_plasmids.fasta.gz") , emit: plasmids + tuple val(meta), path("*_unclassified.fasta.gz"), emit: unclassified + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + if [[ "$assembly" == *.gz ]]; then + gunzip -c $assembly > ${prefix}.fasta + PlasFlow.py \\ + $args \\ + --input ${prefix}.fasta \\ + --output ${prefix}.tsv + else + PlasFlow.py \\ + $args \\ + --input $assembly \\ + --output ${prefix}.tsv + fi + + if [ -f ${prefix}.tsv_chromosomes.fasta ]; then + mv ${prefix}.tsv_chromosomes.fasta ${prefix}_chromosomes.fasta + gzip -n ${prefix}_chromosomes.fasta + fi + + if [ -f ${prefix}.tsv_plasmids.fasta ]; then + mv ${prefix}.tsv_plasmids.fasta ${prefix}_plasmids.fasta + gzip -n ${prefix}_plasmids.fasta + fi + + if [ -f ${prefix}.tsv_unclassified.fasta ]; then + mv ${prefix}.tsv_unclassified.fasta ${prefix}_unclassified.fasta + gzip -n ${prefix}_unclassified.fasta + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + PlasFlow: $VERSION + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.tsv + touch ${prefix}_chromosomes.fasta.gz + touch ${prefix}_plasmids.fasta.gz + touch ${prefix}_unclassified.fasta.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + PlasFlow: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/plasflow/meta.yml b/modules/nf-core/plasflow/meta.yml new file mode 100644 index 00000000000..c2e65604af4 --- /dev/null +++ b/modules/nf-core/plasflow/meta.yml @@ -0,0 +1,59 @@ +name: "plasflow" +description: Uses PlasFlow for prediction of plasmid sequences in metagenomic contigs. +keywords: + - plasmid + - chromosomes + - metagenomes + - contigs +tools: + - "PlasFlow": + description: | + PlasFlow is a set of scripts used for prediction of plasmid sequences in metagenomic contigs. + It relies on the neural network models trained on full genome and plasmid sequences and is able + to differentiate between plasmids and chromosomes with accuracy reaching 96%. It outperforms + other available solutions for plasmids recovery from metagenomes and incorporates the thresholding + which allows for exclusion of incertain predictions. + homepage: https://github.com/smaegol/PlasFlow + documentation: https://github.com/smaegol/PlasFlow + tool_dev_url: https://github.com/smaegol/PlasFlow + doi: 10.1093/nar/gkx1321 + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - assembly: + type: file + description: fasta file + pattern: "*.{gz,fasta,fa,fna}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - tsv: + type: file + description: file containing classified sequences + pattern: "*.tsv" + - chromosomes: + type: file + description: Fasta file containing chromosome sequences + pattern: "*_chromosomes.fasta.gz" + - plasmids: + type: file + description: Fasta file containing plasmid sequences + pattern: "*_plasmids.fasta.gz" + - unclassified: + type: file + description: Fasta file containing unclassified sequences + pattern: "*_unclassified.fasta.gz" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@limrp" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index c7d84e3e980..88629ab0ef4 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1346,6 +1346,9 @@ pints/caller: pirate: - modules/nf-core/pirate/** - tests/modules/nf-core/pirate/** +plasflow: + - modules/nf-core/plasflow/** + - tests/modules/nf-core/plasflow/** plasmidfinder: - modules/nf-core/plasmidfinder/** - tests/modules/nf-core/plasmidfinder/** diff --git a/tests/modules/nf-core/plasflow/main.nf b/tests/modules/nf-core/plasflow/main.nf new file mode 100644 index 00000000000..188592f5446 --- /dev/null +++ b/tests/modules/nf-core/plasflow/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { PLASFLOW } from '../../../../modules/nf-core/plasflow/main.nf' + +workflow test_plasflow { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['bacteroides_fragilis']['illumina']['test1_contigs_fa_gz'], checkIfExists: true) + ] + + PLASFLOW ( input ) +} diff --git a/tests/modules/nf-core/plasflow/nextflow.config b/tests/modules/nf-core/plasflow/nextflow.config new file mode 100644 index 00000000000..50f50a7a357 --- /dev/null +++ b/tests/modules/nf-core/plasflow/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/nf-core/plasflow/test.yml b/tests/modules/nf-core/plasflow/test.yml new file mode 100644 index 00000000000..150e4d2f268 --- /dev/null +++ b/tests/modules/nf-core/plasflow/test.yml @@ -0,0 +1,11 @@ +- name: plasflow + command: nextflow run ./tests/modules/nf-core/plasflow -entry test_plasflow -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/plasflow/nextflow.config + tags: + - plasflow + files: + - path: output/plasflow/test.tsv + md5sum: a7c0ee75bca40f7ae5000a120d3f2099 + - path: output/plasflow/test_chromosomes.fasta.gz + - path: output/plasflow/test_plasmids.fasta.gz + - path: output/plasflow/test_unclassified.fasta.gz + - path: output/plasflow/versions.yml