Skip to content

Commit

Permalink
bcftools csq (#6605)
Browse files Browse the repository at this point in the history
* bcftools csq

* fix lint / format

* env.yml

* move fun at bottom

* Update modules/nf-core/bcftools/csq/main.nf

Co-authored-by: Sateesh_Peri <[email protected]>

---------

Co-authored-by: Sateesh_Peri <[email protected]>
  • Loading branch information
lindenb and sateeshperi authored Sep 6, 2024
1 parent 8afa912 commit c3e4418
Show file tree
Hide file tree
Showing 7 changed files with 239 additions and 0 deletions.
5 changes: 5 additions & 0 deletions modules/nf-core/bcftools/csq/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::bcftools=1.20"
81 changes: 81 additions & 0 deletions modules/nf-core/bcftools/csq/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@

process BCFTOOLS_CSQ {
tag "$meta.id"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_1':
'biocontainers/bcftools:1.20--h8b25389_1' }"


input:
tuple val(meta), path(vcf)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)
tuple val(meta4), path(gff3)

output:
tuple val(meta), path("*.${extension}"), emit: vcf
tuple val(meta), path("*.tbi") , emit: tbi, optional: true
tuple val(meta), path("*.csi") , emit: csi, optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

extension = getVcfExtension(args);

if ("$vcf" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!"

"""
bcftools csq \\
--output ${prefix}.${extension} \\
--threads ${task.cpus} \\
--fasta-ref ${fasta} \\
--gff-annot ${gff3} \\
$args \\
$vcf
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
extension = getVcfExtension(args);

def index = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" :
args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" :
args.contains("--write-index") || args.contains("-W") ? "csi" :
""
def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch"
def create_index = extension.endsWith(".gz") && index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index}" : ""

if ("$vcf" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!"

"""
${create_cmd} ${prefix}.${extension}
${create_index}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
END_VERSIONS
"""
}
// Custom Functions
String getVcfExtension(String args) {
return args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" :
args.contains("--output-type u") || args.contains("-Ou") ? "bcf" :
args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" :
args.contains("--output-type v") || args.contains("-Ov") ? "vcf" :
"vcf";
}
81 changes: 81 additions & 0 deletions modules/nf-core/bcftools/csq/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
name: bcftools_csq
description: bcftools Haplotype-aware consequence caller
keywords:
- annotation
- gff
- gff3
- protein
- functional
- vcf
- bcf
- bcftools
tools:
- reheader:
description: |
Haplotype-aware consequence caller
homepage: http://samtools.github.io/bcftools/bcftools.html
documentation: http://samtools.github.io/bcftools/bcftools.html#csq
doi: 10.1093/gigascience/giab008
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- vcf:
type: file
description: VCF/BCF file
pattern: "*.{vcf.gz,vcf,bcf}"
- meta2:
type: map
description: |
Groovy Map containing fasta information
- fasta:
type: file
description: Fasta reference
pattern: "*.{fasta,fa}"
- meta3:
type: map
description: |
Groovy Map containing fai information
- fai:
type: file
description: Fasta index
pattern: "*.{fai}"
- meta4:
type: map
description: |
Groovy Map containing gff3 information
- gff3:
type: file
description: GFF3 file
pattern: "*.{gff,gff.gz,gff3,gff3.gz}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- vcf:
type: file
description: VCF with annotation, bgzipped per default
pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}"
- tbi:
type: file
description: Alternative VCF file index
pattern: "*.tbi"
- csi:
type: file
description: Default VCF file index
pattern: "*.csi"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@lindenb"

maintainers:
- "@lindenb"
51 changes: 51 additions & 0 deletions modules/nf-core/bcftools/csq/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
nextflow_process {

name "Test Process BCFTOOLS_CSQ"
script "../main.nf"
process "BCFTOOLS_CSQ"
tag "modules"
tag "modules_nfcore"
tag "bcftools"
tag "bcftools/csq"

test("homo_sapiens") {

config "./vcf.config"
when {

process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', checkIfExists: true)
]
input[1] = [
[ : ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true)
]
input[2] = [
[ : ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true)
]
input[3] = [
[ : ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gff3', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
path(process.out.vcf[0][1]).vcf.variantsMD5,
process.out.versions
).match()
}
)
}

}

}
15 changes: 15 additions & 0 deletions modules/nf-core/bcftools/csq/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"homo_sapiens": {
"content": [
"807319f441f639c33708781757da53ee",
[
"versions.yml:md5,13a1a38a47f60c47fd15a32552492bbf"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-06T16:02:00.380988135"
}
}
2 changes: 2 additions & 0 deletions modules/nf-core/bcftools/csq/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
bcftools/csq:
- "modules/nf-core/bcftools/csq/**"
4 changes: 4 additions & 0 deletions modules/nf-core/bcftools/csq/tests/vcf.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
process {
ext.args = { "--local-csq --ncsq 20 --unify-chr-names 1 -Ov " }
ext.prefix = "tested"
}

0 comments on commit c3e4418

Please sign in to comment.