Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

new tool: jvarkit/vcfpolyx #6580

Merged
merged 18 commits into from
Sep 5, 2024
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions modules/nf-core/jvarkit/vcfpolyx/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
name: "jvarkit_vcfpolyx"
channels:
- bioconda
maxulysse marked this conversation as resolved.
Show resolved Hide resolved
- conda-forge
dependencies:
- "bioconda::jvarkit=2024.08.25"
70 changes: 70 additions & 0 deletions modules/nf-core/jvarkit/vcfpolyx/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/**
* JVARKIT_VCFPOLYX
* Author: Pierre Lindenbaum PhD
* vcfpolyx is a sub-command of the jvarkit package. It is used to annotate a vcf for the poly-x repeats
*/
lindenb marked this conversation as resolved.
Show resolved Hide resolved
process JVARKIT_VCFPOLYX {
tag "$meta.id"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/jvarkit:2024.08.25--hdfd78af_1':
'biocontainers/jvarkit:2024.08.25--hdfd78af_1' }"

input:
tuple val(meta), path(vcf)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)
tuple val(meta4), path(dict)

output:
tuple val(meta), path("*.${extension}"), emit: vcf
tuple val(meta), path("*.tbi") , emit: tbi, optional: true
tuple val(meta), path("*.csi") , emit: csi, optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args1 = task.ext.args1 ?: ''
def args2 = meta.vcfpolyx_args ?: (task.ext.args2 ?: ' --tag POLYX --max-repeats 10 ')
def args3 = task.ext.args3 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

extension = args3.contains("--output-type b") || args3.contains("-Ob") ? "bcf.gz" :
args3.contains("--output-type u") || args3.contains("-Ou") ? "bcf" :
args3.contains("--output-type z") || args3.contains("-Oz") ? "vcf.gz" :
args3.contains("--output-type v") || args3.contains("-Ov") ? "vcf" :
"vcf"

if ("$vcf" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
"""
mkdir -p TMP

bcftools view -O v ${args1} "${vcf}" |\\
jvarkit -Xmx${task.memory.giga}g -XX:-UsePerfData -Djava.io.tmpdir=TMP vcfpolyx --reference "${fasta}" ${args2} |\\
bcftools view --output "${prefix}.${extension}" ${args3}

rm -rf TMP

cat <<-END_VERSIONS > versions.yml
"${task.process}":
bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
jvarkit: \$(jvarkit -v)
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch "${prefix}.${extension}"

cat <<-END_VERSIONS > versions.yml
"${task.process}":
bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
jvarkit: \$(jvarkit -v)
END_VERSIONS
"""
}
94 changes: 94 additions & 0 deletions modules/nf-core/jvarkit/vcfpolyx/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "jvarkit_vcfpolyx"
description: annotate VCF files for poly repeats
keywords:
- vcf
- bcf
- annotation
- repeats
tools:
- "jvarkit":
description: "Java utilities for Bioinformatics."
homepage: "https://github.com/lindenb/jvarkit"
documentation: "https://jvarkit.readthedocs.io/"
tool_dev_url: "https://github.com/lindenb/jvarkit"
doi: "10.6084/m9.figshare.1425030"
licence: ["MIT License"]

- "bcftools":
description: |
View, subset and filter VCF or BCF files by position and filtering expression. Convert between VCF and BCF
homepage: "http://samtools.github.io/bcftools/bcftools.html"
documentation: "http://www.htslib.org/doc/bcftools.html"
doi: "10.1093/bioinformatics/btp352"
licence: ["MIT"]

input:
- meta:
type: map
description: |
Groovy Map containing VCF information

- meta2:
type: map
description: |
Groovy Map containing fasta information

- meta3:
type: map
description: |
Groovy Map containing fasta.fai information

- meta4:
type: map
description: |
Groovy Map containing fasta.dict information

- vcf:
type: file
description: Groovy Map containing reference genome information for vcf
pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}"

- fasta:
type: file
description: Groovy Map containing reference genome information for fai reference fasta file
pattern: "*.fasta"

- fai:
type: file
description: Groovy Map containing reference genome information for fai
pattern: "*.fasta.fai"

- dict:
type: file
description: Groovy Map containing reference genome information for GATK sequence dictionary
pattern: "*.dict"

output:
- meta:
type: map
description: |
Groovy Map containing VCF information
e.g. [ id:'test', single_end:false ]
- vcf:
type: file
description: VCF filtered output file
pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}"
- csi:
type: file
description: Default VCF file index
pattern: "*.csi"
- tbi:
type: file
description: Alternative VCF file index
pattern: "*.tbi"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@lindenb"
maintainers:
- "@lindenb"
43 changes: 43 additions & 0 deletions modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// nf-core modules test jvarkit/vcfpolyx
nextflow_process {

name "Test Process JVARKIT_VCFPOLYX"
script "../main.nf"
process "JVARKIT_VCFPOLYX"

tag "modules"
tag "modules_nfcore"
tag "jvarkit"
tag "jvarkit/vcfpolyx"

test("sarscov2 - vcf") {

when {
process {
"""
input[0] =[
[id:"vcf_test"],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true)
]
input[1] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
input[2] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ]
input[3] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) ]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
path(process.out.vcf[0][1]).vcf.variantsMD5,
process.out.versions
).match()
}
)
}

}


}
15 changes: 15 additions & 0 deletions modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"sarscov2 - vcf": {
"content": [
"65a03a6057dc74467c2b7b17230e7f14",
[
"versions.yml:md5,b3c351a56da9062295ef90011a9cd48c"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-03T14:00:13.118369362"
}
}
2 changes: 2 additions & 0 deletions modules/nf-core/jvarkit/vcfpolyx/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
jvarkit/vcfpolyx:
- "modules/nf-core/jvarkit/vcfpolyx/**"
Loading