diff --git a/modules/nf-core/poolsnp/environment.yml b/modules/nf-core/poolsnp/environment.yml new file mode 100644 index 00000000000..87b56af84e3 --- /dev/null +++ b/modules/nf-core/poolsnp/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "poolsnp" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::poolsnp=1.0.1" diff --git a/modules/nf-core/poolsnp/main.nf b/modules/nf-core/poolsnp/main.nf new file mode 100644 index 00000000000..c292f395627 --- /dev/null +++ b/modules/nf-core/poolsnp/main.nf @@ -0,0 +1,63 @@ +process POOLSNP { + tag "$meta.id" + label 'process_medium' + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/poolsnp:1.0.1--py312h7e72e81_0': + 'biocontainers/poolsnp:1.0.1--py312h7e72e81_0' }" + + input: + tuple val(meta) , path(mpileup) + tuple val(meta2), path(reference) + tuple val(meta) , val(max_cov), path(max_cov_file) + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*cov-*.txt"), emit: max_cov , optional: true + tuple val(meta), path("*BS.txt.gz"), emit: bad_sites, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.0.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + assert (!max_cov && max_cov_file) || (max_cov && !max_cov_file) + + """ + PoolSNP.sh \\ + mpileup=\$PWD/${mpileup} \\ + output=\$PWD/${prefix} \\ + names=${prefix} \\ + reference=\$PWD/${reference} \\ + jobs=${task.cpus} \\ + max-cov=${max_cov ? "${max_cov}" : "\$PWD/${max_cov_file}"} \\ + $args + + cat <<-END_VERSIONS > versions.yml + ${task.process}: + poolsnp: "${VERSION}" + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.0.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + + """ + echo "##fileformat=VCFv4.2" > ${prefix}.vcf + echo "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO" >> ${prefix}.vcf + gzip ${prefix}.vcf + ${max_cov ? "touch ${prefix}_cov-${max_cov}.txt" : ""} + echo "" | gzip > ${prefix}_BS.txt.gz + + cat <<-END_VERSIONS > versions.yml + ${task.process}: + poolsnp: "${VERSION}" + END_VERSIONS + """ +} diff --git a/modules/nf-core/poolsnp/meta.yml b/modules/nf-core/poolsnp/meta.yml new file mode 100644 index 00000000000..8de67751c65 --- /dev/null +++ b/modules/nf-core/poolsnp/meta.yml @@ -0,0 +1,98 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "poolsnp" +description: PoolSNP is a heuristic SNP caller, which uses an MPILEUP file and a reference genome in FASTA format as inputs. +keywords: + - poolseq + - mpileup + - variant-calling +tools: + - "poolsnp": + description: "PoolSNP is a heuristic SNP caller, which uses an MPILEUP file and a reference genome in FASTA format as inputs." + homepage: "https://github.com/capoony/PoolSNP" + documentation: "https://github.com/capoony/PoolSNP/blob/master/README.md" + licence: ["Apache-2.0"] + args_id: "$args" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information. + e.g. `[ id:'sample1', single_end:false ]` + + - mpileup: + type: file + description: | + MPILEUP file. This file contains the base calls and alignment information + for each position in the reference genome. + It is used as input for variant calling and other downstream analyses. + pattern: "*.mpileup" + + - meta2: + type: map + description: | + Groovy Map containing sample information. + e.g. `[ id:'sample1' ]` + + - reference: + type: file + description: | + Reference genome in FASTA format. + May NOT contain any special characters such as "/|,:" + pattern: "*.{fasta,fa}" + + - max_cov: + type: float + description: | + Maximum coverage is calculated for every library and chromosomal arm + as the percentile of a coverage distribution, + e.g. max-cov=0.98 will only consider positions within the 98% coverage percentile + for a given sample and chromosomal arm. + Note: Provide `max_cov` or `max_cov_file` but not both. + Read more: https://github.com/capoony/PoolSNP + + - max_cov_file: + type: file + description: | + File containing the maximum coverage thresholds for all chromosomal arms and libraries. + This file needs to be tab-delimited with two columns: + 1. Chromosomal name + 2. Comma-separated list of coverage thresholds for each sample in the mpileup file. + e.g. `2L 100,100,100,200,200` would mean a threshold of 100 for the first three samples + and 200 for the last two samples on chromosomal arm 2L. + Note: Provide `max_cov` or `max_cov_file` but not both. + Read more: https://github.com/capoony/PoolSNP + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - vcf: + type: file + description: Gzipped VCF file containing allele counts and frequencies for every position and library + pattern: "*.vcf.gz" + + - max_cov: + type: file + description: File containing the maximum coverage thresholds for all chromosomal arms and libraries + pattern: "*cov-*.txt" + + - bad_sites: + type: file + description: File containing a list of sites (variable and invariable) that did not pass the SNP calling criteria + pattern: "*BS.txt.gz" + optional: true + + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@abhilesh" +maintainers: + - "@abhilesh" diff --git a/modules/nf-core/poolsnp/tests/main.nf.test b/modules/nf-core/poolsnp/tests/main.nf.test new file mode 100644 index 00000000000..2130e272a02 --- /dev/null +++ b/modules/nf-core/poolsnp/tests/main.nf.test @@ -0,0 +1,156 @@ +nextflow_process { + + name "Test Process POOLSNP" + script "../main.nf" + process "POOLSNP" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "poolsnp" + tag "samtools/mpileup" + + setup { + run("SAMTOOLS_MPILEUP") { + script "../../samtools/mpileup/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ]) + input[1] = Channel.of([ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]) + """ + } + } + } + + test("sarscov2 - mpileup - max_cov") { + + when { + process { + """ + input[0] = SAMTOOLS_MPILEUP.out.mpileup + input[1] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [ + [ id:'test' ], // meta map + 0.7, + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + path(process.out.vcf[0][1]).vcf.variantsMD5, + process.out.max_cov + ).match() + } + ) + } + } + + test("sarscov2 - mpileup - max_cov_file") { + + when { + process { + """ + input[0] = SAMTOOLS_MPILEUP.out.mpileup + input[1] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'test' ], // meta map + '', + ]).combine( Channel.of("MT192765.1\t98").collectFile( name:'max_cov.tsv', newLine: true )) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + path(process.out.vcf[0][1]).vcf.variantsMD5 + ).match() + } + ) + } + } + + test("sarscov2 - mpileup - max_cov - stub") { + + options "-stub" + + when { + process { + """ + input[0] = SAMTOOLS_MPILEUP.out.mpileup + input[1] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [ + [ id:'test' ], // meta map + 0.7, + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + path(process.out.vcf[0][1]).vcf.variantsMD5, + process.out.max_cov + ).match() + } + ) + } + } + + test("sarscov2 - mpileup - max_cov_file - stub") { + + options "-stub" + + when { + process { + """ + input[0] = SAMTOOLS_MPILEUP.out.mpileup + input[1] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'test' ], // meta map + '', + ]).combine( Channel.of("MT192765.1\t98").collectFile( name:'max_cov.tsv', newLine: true )) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + path(process.out.vcf[0][1]).vcf.variantsMD5 + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/poolsnp/tests/main.nf.test.snap b/modules/nf-core/poolsnp/tests/main.nf.test.snap new file mode 100644 index 00000000000..f908d02916c --- /dev/null +++ b/modules/nf-core/poolsnp/tests/main.nf.test.snap @@ -0,0 +1,70 @@ +{ + "sarscov2 - mpileup - max_cov - stub": { + "content": [ + [ + "versions.yml:md5,2bc174a10a560499c04e6b35ce86fed0" + ], + "d41d8cd98f00b204e9800998ecf8427e", + [ + [ + { + "id": "test" + }, + "test_cov-0.7.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-16T15:51:10.298893" + }, + "sarscov2 - mpileup - max_cov_file - stub": { + "content": [ + [ + "versions.yml:md5,2bc174a10a560499c04e6b35ce86fed0" + ], + "d41d8cd98f00b204e9800998ecf8427e" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-16T15:56:12.188459" + }, + "sarscov2 - mpileup - max_cov": { + "content": [ + [ + "versions.yml:md5,2bc174a10a560499c04e6b35ce86fed0" + ], + "46e2d27f13d700dbdb061091308f1555", + [ + [ + { + "id": "test" + }, + "test-cov-0.7.txt:md5,ccbb119522c09daa976a9015ba999329" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-16T15:30:50.011" + }, + "sarscov2 - mpileup - max_cov_file": { + "content": [ + [ + "versions.yml:md5,2bc174a10a560499c04e6b35ce86fed0" + ], + "46e2d27f13d700dbdb061091308f1555" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-16T15:31:24.140061" + } +} \ No newline at end of file diff --git a/modules/nf-core/poolsnp/tests/nextflow.config b/modules/nf-core/poolsnp/tests/nextflow.config new file mode 100644 index 00000000000..5b60f6b0f7c --- /dev/null +++ b/modules/nf-core/poolsnp/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + + withName: POOLSNP { + ext.args = 'min-cov=4 min-count=4 min-freq=0.01 miss-frac=0.5 badsites=1 allsites=1' + } +} \ No newline at end of file diff --git a/modules/nf-core/poolsnp/tests/tags.yml b/modules/nf-core/poolsnp/tests/tags.yml new file mode 100644 index 00000000000..e1295412e32 --- /dev/null +++ b/modules/nf-core/poolsnp/tests/tags.yml @@ -0,0 +1,2 @@ +poolsnp: + - "modules/nf-core/poolsnp/**"