Skip to content

Commit

Permalink
new module : jvarkit/vcffilterjdk (#6621)
Browse files Browse the repository at this point in the history
* vcffilterdjdk

* update params

* update params

* oppsss tag and TODO

* target/region

* answers to review

* f...g space

* fix conda problem https://nfcore.slack.com/archives/CJRH30T6V/p1726233311260959

* add test+bed

* reset polyx

* prevent test exception md5sum for empty file

* update main.nf.test

* update meta.yml

* remove suggestion

---------

Co-authored-by: James A. Fellows Yates <[email protected]>
  • Loading branch information
lindenb and jfy133 authored Sep 20, 2024
1 parent 039499d commit 24b3817
Show file tree
Hide file tree
Showing 7 changed files with 385 additions and 0 deletions.
8 changes: 8 additions & 0 deletions modules/nf-core/jvarkit/vcffilterjdk/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::jvarkit=2024.08.25"
- "bioconda:bcftools=1.20"
88 changes: 88 additions & 0 deletions modules/nf-core/jvarkit/vcffilterjdk/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
process JVARKIT_VCFFILTERJDK {
tag "$meta.id"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/jvarkit:2024.08.25--hdfd78af_1':
'biocontainers/jvarkit:2024.08.25--hdfd78af_1' }"

input:
tuple val(meta), path(vcf), path(tbi), path(regions_file)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)
tuple val(meta4), path(dict)
tuple val(meta5), path(code)
tuple val(meta6), path(pedigree)

output:
tuple val(meta), path("*.${extension}"), emit: vcf
tuple val(meta), path("*.tbi") , emit: tbi, optional: true
tuple val(meta), path("*.csi") , emit: csi, optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args1 = task.ext.args1 ?: ''
def args2 = task.ext.args2 ?: ''
def args3 = task.ext.args3 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def script_file = code ? "--script \"${code}\"" : ""
def pedigree_file = pedigree ? " --pedigree \"${pedigree}\" " : ""
def regions_cmd = regions_file ? (tbi ? " --regions-file" : " --targets-file") + " \"${regions_file}\" " : ""

extension = getVcfExtension(args3); /* custom function, see below */

if ("$vcf" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
"""
mkdir -p TMP
bcftools view \\
-O v \\
${regions_cmd} \\
${args1} \\
"${vcf}" |\\
jvarkit -Xmx${task.memory.giga}g -XX:-UsePerfData -Djava.io.tmpdir=TMP vcffilterjdk \\
${pedigree_file} \\
${script_file} \\
${args2} |\\
bcftools view \\
--output "${prefix}.${extension}" \\
${args3}
rm -rf TMP
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
jvarkit: \$(jvarkit -v)
END_VERSIONS
"""

stub:
def args3 = task.ext.args3 ?: ''
extension = getVcfExtension(args3); /* custom function, see below */
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch "${prefix}.${extension}"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
jvarkit: \$(jvarkit -v)
END_VERSIONS
"""
}



// Custom Function to get VCF extension
String getVcfExtension(String args) {
return args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" :
args.contains("--output-type u") || args.contains("-Ou") ? "bcf" :
args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" :
args.contains("--output-type v") || args.contains("-Ov") ? "vcf" :
"vcf";
}
118 changes: 118 additions & 0 deletions modules/nf-core/jvarkit/vcffilterjdk/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "jvarkit_vcffilterjdk"
description: Filtering VCF with dynamically-compiled java expressions
keywords:
- vcf
- bcf
- filter
- variant
- java
- script
tools:
- "jvarkit":
description: "Java utilities for Bioinformatics."
homepage: "https://github.com/lindenb/jvarkit"
documentation: "https://jvarkit.readthedocs.io/"
tool_dev_url: "https://github.com/lindenb/jvarkit"
doi: "10.1093/bioinformatics/btx734 "
licence: ["MIT License"]
args_id: "$args2"

- "bcftools":
description: |
View, subset and filter VCF or BCF files by position and filtering expression. Convert between VCF and BCF
homepage: "http://samtools.github.io/bcftools/bcftools.html"
documentation: "http://www.htslib.org/doc/bcftools.html"
doi: "10.1093/bioinformatics/btp352"
licence: ["MIT"]
args_id: ["$args1", "$args3"]
input:
- meta:
type: map
description: |
Groovy Map containing VCF information
e.g. [ id:'test_reference' ]
- vcf:
type: file
description: Input VCF/BCF file
pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}"
- tbi:
type: file
description: Optional VCF/BCF index file
pattern: "*.{tbi,csi}"
- regions_file:
type: file
description: Optional. Restrict to regions listed in a file
pattern: "*.{bed,bed.gz,txt,tsv}"
- meta2:
type: map
description: |
Groovy Map containing fasta information
e.g. [ id:'test_reference' ]
- fasta:
type: file
description: Fasta reference file
pattern: "*.fasta"
- meta3:
type: map
description: |
Groovy Map containing fasta.fai information
e.g. [ id:'test_reference' ]
- fai:
type: file
description: Fasta file index
pattern: "*.fasta.fai"
- meta4:
type: map
description: |
Groovy Map containing fasta.dict information
e.g. [ id:'test_reference' ]
- dict:
type: file
description: GATK sequence dictionary
pattern: "*.dict"
- meta5:
type: map
description: |
Groovy Map containing code information
e.g. [ id:'test_reference' ]
- code:
type: file
description: File containing custom user code . May be empty if script if provided via `task.ext.args2`.
pattern: "*.{code,script,txt,tsv,java,js}"
- meta6:
type: map
description: |
Groovy Map containing pedigree information
e.g. [ id:'test_reference' ]
- pedigree:
type: file
description: Optional jvarkit pedigree.
pattern: "*.{tsv,ped,pedigree}"
output:
- meta:
type: map
description: |
Groovy Map containing VCF information
e.g. [ id:'test', single_end:false ]
- vcf:
type: file
description: VCF filtered output file
pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}"
- csi:
type: file
description: Default VCF file index
pattern: "*.csi"
- tbi:
type: file
description: Alternative VCF file index
pattern: "*.tbi"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@lindenb"
maintainers:
- "@lindenb"
119 changes: 119 additions & 0 deletions modules/nf-core/jvarkit/vcffilterjdk/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// nf-core modules test jvarkit/vcffilterjdk
nextflow_process {

name "Test Process JVARKIT_VCFFILTERJDK"
script "../main.nf"
process "JVARKIT_VCFFILTERJDK"
config "./nextflow.config"


tag "modules"
tag "modules_nfcore"
tag "jvarkit"
tag "jvarkit/vcffilterjdk"

test("sarscov2 - vcf") {

when {
process {
"""
input[0] =[
[id:"vcf_test"],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true),
[],
[]
]
input[1] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
input[2] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ]
input[3] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) ]
input[4] = [ [] , []]
input[5] = [ [] , []]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
path(process.out.vcf[0][1]).vcf.variantsMD5,
process.out.versions
).match()
}
)
}

}



test("sarscov2 - vcf+bed") {

when {
process {
"""
input[0] =[
[id:"vcf_test"],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true),
[],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true)
]
input[1] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
input[2] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ]
input[3] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) ]
input[4] = [ [] , []]
input[5] = [ [] , []]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert file(process.out.vcf[0][1]).exists() },
{ assert snapshot(process.out.versions).match()
}
)
}
}




test("sarscov2 - vcf - stub") {

options "-stub"

when {
process {
"""
input[0] =[
[id:"vcf_test"],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true),
[],
[]
]
input[1] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
input[2] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ]
input[3] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) ]
input[4] = [ [] , []]
input[5] = [ [] , []]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
path(process.out.vcf[0][1]),
process.out.versions
).match()
}
)
}

}


}
45 changes: 45 additions & 0 deletions modules/nf-core/jvarkit/vcffilterjdk/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"sarscov2 - vcf": {
"content": [
"335cdc0f8c403378e1e9d75c41c3736f",
[
"versions.yml:md5,3601751995727e2ee7102d8ef18e5304"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-03T14:00:13.118369362"
},


"sarscov2 - vcf+bed": {
"content": [
[
"versions.yml:md5,3601751995727e2ee7102d8ef18e5304"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-03T14:00:13.118369362"
},


"sarscov2 - vcf - stub": {
"content": [
"vcf_test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e",
[
"versions.yml:md5,3601751995727e2ee7102d8ef18e5304"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-03T14:00:13.118369362"
}

}
5 changes: 5 additions & 0 deletions modules/nf-core/jvarkit/vcffilterjdk/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {
withName: JVARKIT_VCFFILTERJDK {
ext.args2=" --expression 'return variant.getStart()%2==1;' "
}
}
2 changes: 2 additions & 0 deletions modules/nf-core/jvarkit/vcffilterjdk/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
jvarkit/vcffilterjdk:
- "modules/nf-core/jvarkit/vcffilterjdk/**"

0 comments on commit 24b3817

Please sign in to comment.