From 24b38175e0a63065ff3a2f851ad7e9eaf3c4412f Mon Sep 17 00:00:00 2001 From: Pierre Lindenbaum <33838+lindenb@users.noreply.github.com> Date: Fri, 20 Sep 2024 10:59:39 +0200 Subject: [PATCH] new module : jvarkit/vcffilterjdk (#6621) * vcffilterdjdk * update params * update params * oppsss tag and TODO * target/region * answers to review * f...g space * fix conda problem https://nfcore.slack.com/archives/CJRH30T6V/p1726233311260959 * add test+bed * reset polyx * prevent test exception md5sum for empty file * update main.nf.test * update meta.yml * remove suggestion --------- Co-authored-by: James A. Fellows Yates --- .../jvarkit/vcffilterjdk/environment.yml | 8 ++ modules/nf-core/jvarkit/vcffilterjdk/main.nf | 88 +++++++++++++ modules/nf-core/jvarkit/vcffilterjdk/meta.yml | 118 +++++++++++++++++ .../jvarkit/vcffilterjdk/tests/main.nf.test | 119 ++++++++++++++++++ .../vcffilterjdk/tests/main.nf.test.snap | 45 +++++++ .../vcffilterjdk/tests/nextflow.config | 5 + .../jvarkit/vcffilterjdk/tests/tags.yml | 2 + 7 files changed, 385 insertions(+) create mode 100644 modules/nf-core/jvarkit/vcffilterjdk/environment.yml create mode 100644 modules/nf-core/jvarkit/vcffilterjdk/main.nf create mode 100644 modules/nf-core/jvarkit/vcffilterjdk/meta.yml create mode 100644 modules/nf-core/jvarkit/vcffilterjdk/tests/main.nf.test create mode 100644 modules/nf-core/jvarkit/vcffilterjdk/tests/main.nf.test.snap create mode 100644 modules/nf-core/jvarkit/vcffilterjdk/tests/nextflow.config create mode 100644 modules/nf-core/jvarkit/vcffilterjdk/tests/tags.yml diff --git a/modules/nf-core/jvarkit/vcffilterjdk/environment.yml b/modules/nf-core/jvarkit/vcffilterjdk/environment.yml new file mode 100644 index 00000000000..bc395065668 --- /dev/null +++ b/modules/nf-core/jvarkit/vcffilterjdk/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::jvarkit=2024.08.25" + - "bioconda:bcftools=1.20" diff --git a/modules/nf-core/jvarkit/vcffilterjdk/main.nf b/modules/nf-core/jvarkit/vcffilterjdk/main.nf new file mode 100644 index 00000000000..3b10a1f9cc2 --- /dev/null +++ b/modules/nf-core/jvarkit/vcffilterjdk/main.nf @@ -0,0 +1,88 @@ +process JVARKIT_VCFFILTERJDK { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/jvarkit:2024.08.25--hdfd78af_1': + 'biocontainers/jvarkit:2024.08.25--hdfd78af_1' }" + + input: + tuple val(meta), path(vcf), path(tbi), path(regions_file) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + tuple val(meta5), path(code) + tuple val(meta6), path(pedigree) + + output: + tuple val(meta), path("*.${extension}"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi, optional: true + tuple val(meta), path("*.csi") , emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args1 = task.ext.args1 ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def script_file = code ? "--script \"${code}\"" : "" + def pedigree_file = pedigree ? " --pedigree \"${pedigree}\" " : "" + def regions_cmd = regions_file ? (tbi ? " --regions-file" : " --targets-file") + " \"${regions_file}\" " : "" + + extension = getVcfExtension(args3); /* custom function, see below */ + + if ("$vcf" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ + mkdir -p TMP + + bcftools view \\ + -O v \\ + ${regions_cmd} \\ + ${args1} \\ + "${vcf}" |\\ + jvarkit -Xmx${task.memory.giga}g -XX:-UsePerfData -Djava.io.tmpdir=TMP vcffilterjdk \\ + ${pedigree_file} \\ + ${script_file} \\ + ${args2} |\\ + bcftools view \\ + --output "${prefix}.${extension}" \\ + ${args3} + + rm -rf TMP + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + jvarkit: \$(jvarkit -v) + END_VERSIONS + """ + + stub: + def args3 = task.ext.args3 ?: '' + extension = getVcfExtension(args3); /* custom function, see below */ + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch "${prefix}.${extension}" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + jvarkit: \$(jvarkit -v) + END_VERSIONS + """ +} + + + +// Custom Function to get VCF extension +String getVcfExtension(String args) { + return args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf"; +} diff --git a/modules/nf-core/jvarkit/vcffilterjdk/meta.yml b/modules/nf-core/jvarkit/vcffilterjdk/meta.yml new file mode 100644 index 00000000000..f4bce92102c --- /dev/null +++ b/modules/nf-core/jvarkit/vcffilterjdk/meta.yml @@ -0,0 +1,118 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "jvarkit_vcffilterjdk" +description: Filtering VCF with dynamically-compiled java expressions +keywords: + - vcf + - bcf + - filter + - variant + - java + - script +tools: + - "jvarkit": + description: "Java utilities for Bioinformatics." + homepage: "https://github.com/lindenb/jvarkit" + documentation: "https://jvarkit.readthedocs.io/" + tool_dev_url: "https://github.com/lindenb/jvarkit" + doi: "10.1093/bioinformatics/btx734 " + licence: ["MIT License"] + args_id: "$args2" + + - "bcftools": + description: | + View, subset and filter VCF or BCF files by position and filtering expression. Convert between VCF and BCF + homepage: "http://samtools.github.io/bcftools/bcftools.html" + documentation: "http://www.htslib.org/doc/bcftools.html" + doi: "10.1093/bioinformatics/btp352" + licence: ["MIT"] + args_id: ["$args1", "$args3"] +input: + - meta: + type: map + description: | + Groovy Map containing VCF information + e.g. [ id:'test_reference' ] + - vcf: + type: file + description: Input VCF/BCF file + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + - tbi: + type: file + description: Optional VCF/BCF index file + pattern: "*.{tbi,csi}" + - regions_file: + type: file + description: Optional. Restrict to regions listed in a file + pattern: "*.{bed,bed.gz,txt,tsv}" + - meta2: + type: map + description: | + Groovy Map containing fasta information + e.g. [ id:'test_reference' ] + - fasta: + type: file + description: Fasta reference file + pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing fasta.fai information + e.g. [ id:'test_reference' ] + - fai: + type: file + description: Fasta file index + pattern: "*.fasta.fai" + - meta4: + type: map + description: | + Groovy Map containing fasta.dict information + e.g. [ id:'test_reference' ] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - meta5: + type: map + description: | + Groovy Map containing code information + e.g. [ id:'test_reference' ] + - code: + type: file + description: File containing custom user code . May be empty if script if provided via `task.ext.args2`. + pattern: "*.{code,script,txt,tsv,java,js}" + - meta6: + type: map + description: | + Groovy Map containing pedigree information + e.g. [ id:'test_reference' ] + - pedigree: + type: file + description: Optional jvarkit pedigree. + pattern: "*.{tsv,ped,pedigree}" +output: + - meta: + type: map + description: | + Groovy Map containing VCF information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF filtered output file + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + - csi: + type: file + description: Default VCF file index + pattern: "*.csi" + - tbi: + type: file + description: Alternative VCF file index + pattern: "*.tbi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@lindenb" +maintainers: + - "@lindenb" diff --git a/modules/nf-core/jvarkit/vcffilterjdk/tests/main.nf.test b/modules/nf-core/jvarkit/vcffilterjdk/tests/main.nf.test new file mode 100644 index 00000000000..2dc0a762f1f --- /dev/null +++ b/modules/nf-core/jvarkit/vcffilterjdk/tests/main.nf.test @@ -0,0 +1,119 @@ +// nf-core modules test jvarkit/vcffilterjdk +nextflow_process { + + name "Test Process JVARKIT_VCFFILTERJDK" + script "../main.nf" + process "JVARKIT_VCFFILTERJDK" + config "./nextflow.config" + + + tag "modules" + tag "modules_nfcore" + tag "jvarkit" + tag "jvarkit/vcffilterjdk" + + test("sarscov2 - vcf") { + + when { + process { + """ + input[0] =[ + [id:"vcf_test"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [], + [] + ] + input[1] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + input[2] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ] + input[3] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) ] + input[4] = [ [] , []] + input[5] = [ [] , []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + path(process.out.vcf[0][1]).vcf.variantsMD5, + process.out.versions + ).match() + } + ) + } + + } + + + + test("sarscov2 - vcf+bed") { + + when { + process { + """ + input[0] =[ + [id:"vcf_test"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ] + input[1] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + input[2] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ] + input[3] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) ] + input[4] = [ [] , []] + input[5] = [ [] , []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert file(process.out.vcf[0][1]).exists() }, + { assert snapshot(process.out.versions).match() + } + ) + } + } + + + + + test("sarscov2 - vcf - stub") { + + options "-stub" + + when { + process { + """ + input[0] =[ + [id:"vcf_test"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [], + [] + ] + input[1] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + input[2] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ] + input[3] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) ] + input[4] = [ [] , []] + input[5] = [ [] , []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + path(process.out.vcf[0][1]), + process.out.versions + ).match() + } + ) + } + + } + + +} diff --git a/modules/nf-core/jvarkit/vcffilterjdk/tests/main.nf.test.snap b/modules/nf-core/jvarkit/vcffilterjdk/tests/main.nf.test.snap new file mode 100644 index 00000000000..484f303b8f1 --- /dev/null +++ b/modules/nf-core/jvarkit/vcffilterjdk/tests/main.nf.test.snap @@ -0,0 +1,45 @@ +{ + "sarscov2 - vcf": { + "content": [ + "335cdc0f8c403378e1e9d75c41c3736f", + [ + "versions.yml:md5,3601751995727e2ee7102d8ef18e5304" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-03T14:00:13.118369362" + }, + + + "sarscov2 - vcf+bed": { + "content": [ + [ + "versions.yml:md5,3601751995727e2ee7102d8ef18e5304" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-03T14:00:13.118369362" + }, + + + "sarscov2 - vcf - stub": { + "content": [ + "vcf_test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e", + [ + "versions.yml:md5,3601751995727e2ee7102d8ef18e5304" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-03T14:00:13.118369362" + } + +} diff --git a/modules/nf-core/jvarkit/vcffilterjdk/tests/nextflow.config b/modules/nf-core/jvarkit/vcffilterjdk/tests/nextflow.config new file mode 100644 index 00000000000..9c4e8e90690 --- /dev/null +++ b/modules/nf-core/jvarkit/vcffilterjdk/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: JVARKIT_VCFFILTERJDK { + ext.args2=" --expression 'return variant.getStart()%2==1;' " + } +} diff --git a/modules/nf-core/jvarkit/vcffilterjdk/tests/tags.yml b/modules/nf-core/jvarkit/vcffilterjdk/tests/tags.yml new file mode 100644 index 00000000000..da105badc90 --- /dev/null +++ b/modules/nf-core/jvarkit/vcffilterjdk/tests/tags.yml @@ -0,0 +1,2 @@ +jvarkit/vcffilterjdk: + - "modules/nf-core/jvarkit/vcffilterjdk/**"