Skip to content

Commit

Permalink
Add module PoolSNP (#6173)
Browse files Browse the repository at this point in the history
* Add meta.yml for poolsnp
Update container image links for poolsnp

* Tests outline: still failing

* Add module for PoolSNP

* Fix trailing whitespaces

* Fix trailing whitespaces

* Bump versions

* Remove parameters with default values

* Specify bad_sites as optional

* Add option for max coverage file input

* Apply suggestions from code review

Co-authored-by: Simon Pearce <[email protected]>

* Add tests for stubs

---------

Co-authored-by: Simon Pearce <[email protected]>
  • Loading branch information
abhilesh and SPPearce authored Aug 16, 2024
1 parent a9d770c commit ad07a82
Show file tree
Hide file tree
Showing 7 changed files with 404 additions and 0 deletions.
9 changes: 9 additions & 0 deletions modules/nf-core/poolsnp/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
name: "poolsnp"
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- "bioconda::poolsnp=1.0.1"
63 changes: 63 additions & 0 deletions modules/nf-core/poolsnp/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
process POOLSNP {
tag "$meta.id"
label 'process_medium'
// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/poolsnp:1.0.1--py312h7e72e81_0':
'biocontainers/poolsnp:1.0.1--py312h7e72e81_0' }"

input:
tuple val(meta) , path(mpileup)
tuple val(meta2), path(reference)
tuple val(meta) , val(max_cov), path(max_cov_file)

output:
tuple val(meta), path("*.vcf.gz") , emit: vcf
tuple val(meta), path("*cov-*.txt"), emit: max_cov , optional: true
tuple val(meta), path("*BS.txt.gz"), emit: bad_sites, optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = '1.0.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
assert (!max_cov && max_cov_file) || (max_cov && !max_cov_file)

"""
PoolSNP.sh \\
mpileup=\$PWD/${mpileup} \\
output=\$PWD/${prefix} \\
names=${prefix} \\
reference=\$PWD/${reference} \\
jobs=${task.cpus} \\
max-cov=${max_cov ? "${max_cov}" : "\$PWD/${max_cov_file}"} \\
$args
cat <<-END_VERSIONS > versions.yml
${task.process}:
poolsnp: "${VERSION}"
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = '1.0.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.

"""
echo "##fileformat=VCFv4.2" > ${prefix}.vcf
echo "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO" >> ${prefix}.vcf
gzip ${prefix}.vcf
${max_cov ? "touch ${prefix}_cov-${max_cov}.txt" : ""}
echo "" | gzip > ${prefix}_BS.txt.gz
cat <<-END_VERSIONS > versions.yml
${task.process}:
poolsnp: "${VERSION}"
END_VERSIONS
"""
}
98 changes: 98 additions & 0 deletions modules/nf-core/poolsnp/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "poolsnp"
description: PoolSNP is a heuristic SNP caller, which uses an MPILEUP file and a reference genome in FASTA format as inputs.
keywords:
- poolseq
- mpileup
- variant-calling
tools:
- "poolsnp":
description: "PoolSNP is a heuristic SNP caller, which uses an MPILEUP file and a reference genome in FASTA format as inputs."
homepage: "https://github.com/capoony/PoolSNP"
documentation: "https://github.com/capoony/PoolSNP/blob/master/README.md"
licence: ["Apache-2.0"]
args_id: "$args"

input:
- meta:
type: map
description: |
Groovy Map containing sample information.
e.g. `[ id:'sample1', single_end:false ]`
- mpileup:
type: file
description: |
MPILEUP file. This file contains the base calls and alignment information
for each position in the reference genome.
It is used as input for variant calling and other downstream analyses.
pattern: "*.mpileup"

- meta2:
type: map
description: |
Groovy Map containing sample information.
e.g. `[ id:'sample1' ]`
- reference:
type: file
description: |
Reference genome in FASTA format.
May NOT contain any special characters such as "/|,:"
pattern: "*.{fasta,fa}"

- max_cov:
type: float
description: |
Maximum coverage is calculated for every library and chromosomal arm
as the percentile of a coverage distribution,
e.g. max-cov=0.98 will only consider positions within the 98% coverage percentile
for a given sample and chromosomal arm.
Note: Provide `max_cov` or `max_cov_file` but not both.
Read more: https://github.com/capoony/PoolSNP
- max_cov_file:
type: file
description: |
File containing the maximum coverage thresholds for all chromosomal arms and libraries.
This file needs to be tab-delimited with two columns:
1. Chromosomal name
2. Comma-separated list of coverage thresholds for each sample in the mpileup file.
e.g. `2L 100,100,100,200,200` would mean a threshold of 100 for the first three samples
and 200 for the last two samples on chromosomal arm 2L.
Note: Provide `max_cov` or `max_cov_file` but not both.
Read more: https://github.com/capoony/PoolSNP
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- vcf:
type: file
description: Gzipped VCF file containing allele counts and frequencies for every position and library
pattern: "*.vcf.gz"

- max_cov:
type: file
description: File containing the maximum coverage thresholds for all chromosomal arms and libraries
pattern: "*cov-*.txt"

- bad_sites:
type: file
description: File containing a list of sites (variable and invariable) that did not pass the SNP calling criteria
pattern: "*BS.txt.gz"
optional: true

- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@abhilesh"
maintainers:
- "@abhilesh"
156 changes: 156 additions & 0 deletions modules/nf-core/poolsnp/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
nextflow_process {

name "Test Process POOLSNP"
script "../main.nf"
process "POOLSNP"
config "./nextflow.config"

tag "modules"
tag "modules_nfcore"
tag "poolsnp"
tag "samtools/mpileup"

setup {
run("SAMTOOLS_MPILEUP") {
script "../../samtools/mpileup/main.nf"
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true)
])
input[1] = Channel.of([ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ])
"""
}
}
}

test("sarscov2 - mpileup - max_cov") {

when {
process {
"""
input[0] = SAMTOOLS_MPILEUP.out.mpileup
input[1] = Channel.of([
[ id:'test' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
])
input[2] = [
[ id:'test' ], // meta map
0.7,
[]
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.versions,
path(process.out.vcf[0][1]).vcf.variantsMD5,
process.out.max_cov
).match()
}
)
}
}

test("sarscov2 - mpileup - max_cov_file") {

when {
process {
"""
input[0] = SAMTOOLS_MPILEUP.out.mpileup
input[1] = Channel.of([
[ id:'test' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
])
input[2] = Channel.of([
[ id:'test' ], // meta map
'',
]).combine( Channel.of("MT192765.1\t98").collectFile( name:'max_cov.tsv', newLine: true ))
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.versions,
path(process.out.vcf[0][1]).vcf.variantsMD5
).match()
}
)
}
}

test("sarscov2 - mpileup - max_cov - stub") {

options "-stub"

when {
process {
"""
input[0] = SAMTOOLS_MPILEUP.out.mpileup
input[1] = Channel.of([
[ id:'test' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
])
input[2] = [
[ id:'test' ], // meta map
0.7,
[]
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.versions,
path(process.out.vcf[0][1]).vcf.variantsMD5,
process.out.max_cov
).match()
}
)
}
}

test("sarscov2 - mpileup - max_cov_file - stub") {

options "-stub"

when {
process {
"""
input[0] = SAMTOOLS_MPILEUP.out.mpileup
input[1] = Channel.of([
[ id:'test' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
])
input[2] = Channel.of([
[ id:'test' ], // meta map
'',
]).combine( Channel.of("MT192765.1\t98").collectFile( name:'max_cov.tsv', newLine: true ))
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.versions,
path(process.out.vcf[0][1]).vcf.variantsMD5
).match()
}
)
}
}
}
Loading

0 comments on commit ad07a82

Please sign in to comment.