From 9c708181e0c60fa9bc5ebd0e77fc5fd6033cdd92 Mon Sep 17 00:00:00 2001 From: lindenb Date: Tue, 3 Sep 2024 12:25:22 +0200 Subject: [PATCH 01/16] vcfpolyx, 1st commit --- .../nf-core/jvarkit/vcfpolyx/environment.yml | 9 ++ modules/nf-core/jvarkit/vcfpolyx/main.nf | 89 +++++++++++++++++++ modules/nf-core/jvarkit/vcfpolyx/meta.yml | 45 ++++++++++ .../jvarkit/vcfpolyx/tests/main.nf.test | 68 ++++++++++++++ .../nf-core/jvarkit/vcfpolyx/tests/tags.yml | 2 + 5 files changed, 213 insertions(+) create mode 100644 modules/nf-core/jvarkit/vcfpolyx/environment.yml create mode 100644 modules/nf-core/jvarkit/vcfpolyx/main.nf create mode 100644 modules/nf-core/jvarkit/vcfpolyx/meta.yml create mode 100644 modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test create mode 100644 modules/nf-core/jvarkit/vcfpolyx/tests/tags.yml diff --git a/modules/nf-core/jvarkit/vcfpolyx/environment.yml b/modules/nf-core/jvarkit/vcfpolyx/environment.yml new file mode 100644 index 00000000000..980e48236e6 --- /dev/null +++ b/modules/nf-core/jvarkit/vcfpolyx/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "jvarkit_vcfpolyx" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::jvarkit=2024.08.25" diff --git a/modules/nf-core/jvarkit/vcfpolyx/main.nf b/modules/nf-core/jvarkit/vcfpolyx/main.nf new file mode 100644 index 00000000000..5a220cf31b5 --- /dev/null +++ b/modules/nf-core/jvarkit/vcfpolyx/main.nf @@ -0,0 +1,89 @@ +// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) +// https://github.com/nf-core/modules/tree/master/modules/nf-core/ +// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: +// https://nf-co.re/join +// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. +// All other parameters MUST be provided using the "task.ext" directive, see here: +// https://www.nextflow.io/docs/latest/process.html#ext +// where "task.ext" is a string. +// Any parameters that need to be evaluated in the context of a particular sample +// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. +// TODO nf-core: Software that can be piped together SHOULD be added to separate module files +// unless there is a run-time, storage advantage in implementing in this way +// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: +// bwa mem | samtools view -B -T ref.fasta +// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty +// list (`[]`) instead of a file can be used to work around this issue. + +process JVARKIT_VCFPOLYX { + tag '$bam' + label 'process_single' + + // TODO nf-core: List required Conda package(s). + // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. + // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/jvarkit:2024.08.25--hdfd78af_0': + 'biocontainers/jvarkit:2024.08.25--hdfd78af_0' }" + + input: + // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" + // MUST be provided as an input via a Groovy Map called "meta". + // This information may not be required in some instances e.g. indexing reference genome files: + // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf + // TODO nf-core: Where applicable please provide/convert compressed files as input/output + // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. + path bam + + output: + // TODO nf-core: Named file extensions MUST be emitted for ALL output channels + path "*.bam", emit: bam + // TODO nf-core: List additional required output channels/values here + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 + // If the software is unable to output a version number on the command-line then it can be manually specified + // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf + // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) + // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive + // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter + // using the Nextflow "task" variable e.g. "--threads $task.cpus" + // TODO nf-core: Please replace the example samtools command below with your module's command + // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) + """ + samtools \\ + sort \\ + $args \\ + -@ $task.cpus \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + jvarkit: \$(samtools --version |& sed '1!d ; s/samtools //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + + // TODO nf-core: A stub section should mimic the execution of the original module as best as possible + // Have a look at the following examples: + // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 + // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + jvarkit: \$(samtools --version |& sed '1!d ; s/samtools //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/jvarkit/vcfpolyx/meta.yml b/modules/nf-core/jvarkit/vcfpolyx/meta.yml new file mode 100644 index 00000000000..a2b1b3ead7f --- /dev/null +++ b/modules/nf-core/jvarkit/vcfpolyx/meta.yml @@ -0,0 +1,45 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "jvarkit_vcfpolyx" +## TODO nf-core: Add a description of the module and list keywords +description: write your description here +keywords: + - sort + - example + - genomics +tools: + - "jvarkit": + ## TODO nf-core: Add a description and other details for the software below + description: "Java utilities for Bioinformatics." + homepage: "None" + documentation: "None" + tool_dev_url: "None" + doi: "" + licence: ['MIT License'] + +## TODO nf-core: Add a description of all of the variables used as input +input: + # + ## TODO nf-core: Delete / customise this example input + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +## TODO nf-core: Add a description of all of the variables used as output +output: + # + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + ## TODO nf-core: Delete / customise this example output + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +authors: + - "@lindenb" +maintainers: + - "@lindenb" diff --git a/modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test b/modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test new file mode 100644 index 00000000000..93952cba65d --- /dev/null +++ b/modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test @@ -0,0 +1,68 @@ +// TODO nf-core: Once you have added the required tests, please run the following command to build this file: +// nf-core modules test jvarkit/vcfpolyx +nextflow_process { + + name "Test Process JVARKIT_VCFPOLYX" + script "../main.nf" + process "JVARKIT_VCFPOLYX" + + tag "modules" + tag "modules_nfcore" + tag "jvarkit" + tag "jvarkit/vcfpolyx" + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used + test("sarscov2 - bam") { + + // TODO nf-core: If you are created a test for a chained module + // (the module requires running more than one process to generate the required output) + // add the 'setup' method here. + // You can find more information about how to use a 'setup' method in the docs (https://nf-co.re/docs/contributing/modules#steps-for-creating-nf-test-for-chained-modules). + + when { + process { + """ + // TODO nf-core: define inputs of the process here. Example: + + input[0] = file(params.test_data['sarscov2']['illumina']['test_single_end_bam'], checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + //TODO nf-core: Add all required assertions to verify the test output. + // See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples. + ) + } + + } + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix. + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + // TODO nf-core: define inputs of the process here. Example: + + input[0] = file(params.test_data['sarscov2']['illumina']['test_single_end_bam'], checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + //TODO nf-core: Add all required assertions to verify the test output. + ) + } + + } + +} diff --git a/modules/nf-core/jvarkit/vcfpolyx/tests/tags.yml b/modules/nf-core/jvarkit/vcfpolyx/tests/tags.yml new file mode 100644 index 00000000000..475476a589b --- /dev/null +++ b/modules/nf-core/jvarkit/vcfpolyx/tests/tags.yml @@ -0,0 +1,2 @@ +jvarkit/vcfpolyx: + - "modules/nf-core/jvarkit/vcfpolyx/**" From a26daa1afadeb9818500a9c66f9c16c77a6abda2 Mon Sep 17 00:00:00 2001 From: lindenb Date: Tue, 3 Sep 2024 17:45:32 +0200 Subject: [PATCH 02/16] vcfpolyx --- modules/nf-core/jvarkit/vcfpolyx/main.nf | 95 ++++++++----------- modules/nf-core/jvarkit/vcfpolyx/meta.yml | 16 +++- .../jvarkit/vcfpolyx/tests/main.nf.test | 45 ++------- .../jvarkit/vcfpolyx/tests/main.nf.test.snap | 37 ++++++++ 4 files changed, 96 insertions(+), 97 deletions(-) create mode 100644 modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test.snap diff --git a/modules/nf-core/jvarkit/vcfpolyx/main.nf b/modules/nf-core/jvarkit/vcfpolyx/main.nf index 5a220cf31b5..6220fe9b163 100644 --- a/modules/nf-core/jvarkit/vcfpolyx/main.nf +++ b/modules/nf-core/jvarkit/vcfpolyx/main.nf @@ -1,89 +1,68 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process JVARKIT_VCFPOLYX { - tag '$bam' + tag "$meta.id" label 'process_single' - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/jvarkit:2024.08.25--hdfd78af_0': 'biocontainers/jvarkit:2024.08.25--hdfd78af_0' }" input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. - path bam + tuple val(meta), path(vcf) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) output: - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels - path "*.bam", emit: bam - // TODO nf-core: List additional required output channels/values here - path "versions.yml" , emit: versions + tuple val(meta), path("*.${extension}"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi, optional: true + tuple val(meta), path("*.csi") , emit: csi, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) + def args1 = task.ext.args1 ?: '' + def args2 = task.ext.args2 ?: ' --tag POLYX --max-repeats 10 ' + def args3 = task.ext.args3 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + extension = args3.contains("--output-type b") || args3.contains("-Ob") ? "bcf.gz" : + args3.contains("--output-type u") || args3.contains("-Ou") ? "bcf" : + args3.contains("--output-type z") || args3.contains("-Oz") ? "vcf.gz" : + args3.contains("--output-type v") || args3.contains("-Ov") ? "vcf" : + "vcf" + + if ("$vcf" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" """ - samtools \\ - sort \\ - $args \\ - -@ $task.cpus \\ - $bam + set -o pipefail + mkdir -p TMP + + which bcftools || true && echo OK + + bcftools view -O v ${args1} "${vcf}" |\\ + jvarkit -Xmx${task.memory.giga}g -XX:-UsePerfData -Djava.io.tmpdir=TMP vcfpolyx --reference "${fasta}" ${args2} |\\ + bcftools view --output "${prefix}.${extension}" ${args3} + + rm -rf TMP cat <<-END_VERSIONS > versions.yml "${task.process}": - jvarkit: \$(samtools --version |& sed '1!d ; s/samtools //') + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + jvarkit: \$(jvarkit -v) END_VERSIONS """ stub: - def args = task.ext.args ?: '' - - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 + def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.bam + touch "${prefix}.${extension}" cat <<-END_VERSIONS > versions.yml "${task.process}": - jvarkit: \$(samtools --version |& sed '1!d ; s/samtools //') + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + jvarkit: \$(jvarkit -v) END_VERSIONS """ } diff --git a/modules/nf-core/jvarkit/vcfpolyx/meta.yml b/modules/nf-core/jvarkit/vcfpolyx/meta.yml index a2b1b3ead7f..36e92ed184c 100644 --- a/modules/nf-core/jvarkit/vcfpolyx/meta.yml +++ b/modules/nf-core/jvarkit/vcfpolyx/meta.yml @@ -19,7 +19,13 @@ tools: ## TODO nf-core: Add a description of all of the variables used as input input: - # + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + ## TODO nf-core: Delete / customise this example input - bam: type: file @@ -28,7 +34,13 @@ input: ## TODO nf-core: Add a description of all of the variables used as output output: - # + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - versions: type: file description: File containing software versions diff --git a/modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test b/modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test index 93952cba65d..b98eba3f93e 100644 --- a/modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test +++ b/modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test @@ -1,4 +1,3 @@ -// TODO nf-core: Once you have added the required tests, please run the following command to build this file: // nf-core modules test jvarkit/vcfpolyx nextflow_process { @@ -11,20 +10,18 @@ nextflow_process { tag "jvarkit" tag "jvarkit/vcfpolyx" - // TODO nf-core: Change the test name preferably indicating the test-data and file-format used - test("sarscov2 - bam") { - - // TODO nf-core: If you are created a test for a chained module - // (the module requires running more than one process to generate the required output) - // add the 'setup' method here. - // You can find more information about how to use a 'setup' method in the docs (https://nf-co.re/docs/contributing/modules#steps-for-creating-nf-test-for-chained-modules). + test("sarscov2 - vcf") { when { process { """ - // TODO nf-core: define inputs of the process here. Example: - - input[0] = file(params.test_data['sarscov2']['illumina']['test_single_end_bam'], checkIfExists: true) + input[0] =[ + [id:"vcf_test"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] + input[1] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + input[2] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ] + input[3] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) ] """ } } @@ -33,36 +30,10 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot(process.out).match() } - //TODO nf-core: Add all required assertions to verify the test output. - // See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples. ) } } - // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix. - test("sarscov2 - bam - stub") { - - options "-stub" - - when { - process { - """ - // TODO nf-core: define inputs of the process here. Example: - - input[0] = file(params.test_data['sarscov2']['illumina']['test_single_end_bam'], checkIfExists: true) - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - //TODO nf-core: Add all required assertions to verify the test output. - ) - } - - } } diff --git a/modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test.snap b/modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test.snap new file mode 100644 index 00000000000..71d0668813c --- /dev/null +++ b/modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test.snap @@ -0,0 +1,37 @@ +{ + "sarscov2 - vcf": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + + ], + "versions": [ + + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-03T14:00:13.118369362" + } +} \ No newline at end of file From 41ba2fc2fa159279c0c8593045c174dc76c34a99 Mon Sep 17 00:00:00 2001 From: lindenb Date: Thu, 5 Sep 2024 12:02:39 +0200 Subject: [PATCH 03/16] vcfployx, tests ok --- .../nf-core/jvarkit/vcfpolyx/environment.yml | 2 +- modules/nf-core/jvarkit/vcfpolyx/main.nf | 13 +-- modules/nf-core/jvarkit/vcfpolyx/meta.yml | 80 ++++++++++++------- .../jvarkit/vcfpolyx/tests/main.nf.test | 6 +- .../jvarkit/vcfpolyx/tests/main.nf.test.snap | 32 ++------ 5 files changed, 68 insertions(+), 65 deletions(-) diff --git a/modules/nf-core/jvarkit/vcfpolyx/environment.yml b/modules/nf-core/jvarkit/vcfpolyx/environment.yml index 980e48236e6..04fc58514be 100644 --- a/modules/nf-core/jvarkit/vcfpolyx/environment.yml +++ b/modules/nf-core/jvarkit/vcfpolyx/environment.yml @@ -2,8 +2,8 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json name: "jvarkit_vcfpolyx" channels: - - conda-forge - bioconda + - conda-forge - defaults dependencies: - "bioconda::jvarkit=2024.08.25" diff --git a/modules/nf-core/jvarkit/vcfpolyx/main.nf b/modules/nf-core/jvarkit/vcfpolyx/main.nf index 6220fe9b163..c0b594953a3 100644 --- a/modules/nf-core/jvarkit/vcfpolyx/main.nf +++ b/modules/nf-core/jvarkit/vcfpolyx/main.nf @@ -1,11 +1,16 @@ +/** + * JVARKIT_VCFPOLYX + * Author: Pierre Lindenbaum PhD + * vcfpolyx is a sub-command of the jvarkit package. It is used to annotate a vcf for the poly-x repeats + */ process JVARKIT_VCFPOLYX { tag "$meta.id" label 'process_single' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/jvarkit:2024.08.25--hdfd78af_0': - 'biocontainers/jvarkit:2024.08.25--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/jvarkit:2024.08.25--hdfd78af_1': + 'biocontainers/jvarkit:2024.08.25--hdfd78af_1' }" input: tuple val(meta), path(vcf) @@ -24,7 +29,7 @@ process JVARKIT_VCFPOLYX { script: def args1 = task.ext.args1 ?: '' - def args2 = task.ext.args2 ?: ' --tag POLYX --max-repeats 10 ' + def args2 = meta.vcfpolyx_args ?: (task.ext.args2 ?: ' --tag POLYX --max-repeats 10 ') def args3 = task.ext.args3 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" @@ -39,8 +44,6 @@ process JVARKIT_VCFPOLYX { set -o pipefail mkdir -p TMP - which bcftools || true && echo OK - bcftools view -O v ${args1} "${vcf}" |\\ jvarkit -Xmx${task.memory.giga}g -XX:-UsePerfData -Djava.io.tmpdir=TMP vcfpolyx --reference "${fasta}" ${args2} |\\ bcftools view --output "${prefix}.${extension}" ${args3} diff --git a/modules/nf-core/jvarkit/vcfpolyx/meta.yml b/modules/nf-core/jvarkit/vcfpolyx/meta.yml index 36e92ed184c..ba1fd8edd80 100644 --- a/modules/nf-core/jvarkit/vcfpolyx/meta.yml +++ b/modules/nf-core/jvarkit/vcfpolyx/meta.yml @@ -1,55 +1,73 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "jvarkit_vcfpolyx" -## TODO nf-core: Add a description of the module and list keywords -description: write your description here +description: annotate VCF files for poly repeats keywords: - - sort - - example - - genomics + - vcf + - bcf + - annotation + - repeats tools: + - "jvarkit": - ## TODO nf-core: Add a description and other details for the software below description: "Java utilities for Bioinformatics." - homepage: "None" - documentation: "None" - tool_dev_url: "None" - doi: "" + homepage: "https://github.com/lindenb/jvarkit" + documentation: "https://jvarkit.readthedocs.io/" + tool_dev_url: "https://github.com/lindenb/jvarkit" + doi: "http://dx.doi.org/10.6084/m9.figshare.1425030" licence: ['MIT License'] -## TODO nf-core: Add a description of all of the variables used as input -input: - # Only when we have meta - - meta: - type: map + - "bcftools": description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - ## TODO nf-core: Delete / customise this example input - - bam: + View, subset and filter VCF or BCF files by position and filtering expression. Convert between VCF and BCF + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + +input: + - vcf: + type: file + description: Groovy Map containing reference genome information for vcf + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + + - fasta: type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: Groovy Map containing reference genome information for fai reference fasta file + pattern: "*.fasta" + + - fai: + type: file + description: Groovy Map containing reference genome information for fai + pattern: "*.fasta.fai" + + - dict: + type: file + description: Groovy Map containing reference genome information for GATK sequence dictionary + pattern: "*.dict" -## TODO nf-core: Add a description of all of the variables used as output output: - #Only when we have meta - meta: type: map description: | Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF filtered output file + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + - csi: + type: file + description: Default VCF file index + pattern: "*.csi" + - tbi: + type: file + description: Alternative VCF file index + pattern: "*.tbi" - versions: type: file description: File containing software versions pattern: "versions.yml" - ## TODO nf-core: Delete / customise this example output - - bam: - type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" authors: - "@lindenb" diff --git a/modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test b/modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test index b98eba3f93e..72ec3a71473 100644 --- a/modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test +++ b/modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test @@ -29,7 +29,11 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + path(process.out.vcf[0][1]).vcf.variantsMD5, + process.out.versions + ).match() + } ) } diff --git a/modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test.snap b/modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test.snap index 71d0668813c..618902b4502 100644 --- a/modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test.snap +++ b/modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test.snap @@ -1,32 +1,10 @@ { "sarscov2 - vcf": { "content": [ - { - "0": [ - - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - - ], - "csi": [ - - ], - "tbi": [ - - ], - "vcf": [ - - ], - "versions": [ - - ] - } + "65a03a6057dc74467c2b7b17230e7f14", + [ + "versions.yml:md5,dcb0bbe3f1b3aa091708acb0fb6361d5" + ] ], "meta": { "nf-test": "0.9.0", @@ -34,4 +12,4 @@ }, "timestamp": "2024-09-03T14:00:13.118369362" } -} \ No newline at end of file +} From 6a32597718ec5bc5b568b14f85bb5f8aab81a17e Mon Sep 17 00:00:00 2001 From: lindenb Date: Thu, 5 Sep 2024 12:33:51 +0200 Subject: [PATCH 04/16] fix env and meta ? --- modules/nf-core/jvarkit/vcfpolyx/environment.yml | 2 -- modules/nf-core/jvarkit/vcfpolyx/meta.yml | 8 ++++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/modules/nf-core/jvarkit/vcfpolyx/environment.yml b/modules/nf-core/jvarkit/vcfpolyx/environment.yml index 04fc58514be..9ffce950e45 100644 --- a/modules/nf-core/jvarkit/vcfpolyx/environment.yml +++ b/modules/nf-core/jvarkit/vcfpolyx/environment.yml @@ -3,7 +3,5 @@ name: "jvarkit_vcfpolyx" channels: - bioconda - - conda-forge - - defaults dependencies: - "bioconda::jvarkit=2024.08.25" diff --git a/modules/nf-core/jvarkit/vcfpolyx/meta.yml b/modules/nf-core/jvarkit/vcfpolyx/meta.yml index ba1fd8edd80..af2bb78a55f 100644 --- a/modules/nf-core/jvarkit/vcfpolyx/meta.yml +++ b/modules/nf-core/jvarkit/vcfpolyx/meta.yml @@ -14,15 +14,15 @@ tools: homepage: "https://github.com/lindenb/jvarkit" documentation: "https://jvarkit.readthedocs.io/" tool_dev_url: "https://github.com/lindenb/jvarkit" - doi: "http://dx.doi.org/10.6084/m9.figshare.1425030" + doi: "10.6084/m9.figshare.1425030" licence: ['MIT License'] - "bcftools": description: | View, subset and filter VCF or BCF files by position and filtering expression. Convert between VCF and BCF - homepage: http://samtools.github.io/bcftools/bcftools.html - documentation: http://www.htslib.org/doc/bcftools.html - doi: 10.1093/bioinformatics/btp352 + homepage: "http://samtools.github.io/bcftools/bcftools.html" + documentation: "http://www.htslib.org/doc/bcftools.html" + doi: "10.1093/bioinformatics/btp352" licence: ["MIT"] input: From 19a607992a77f36289f76a1de2b7960957241d64 Mon Sep 17 00:00:00 2001 From: lindenb Date: Thu, 5 Sep 2024 12:40:22 +0200 Subject: [PATCH 05/16] remove tab --- modules/nf-core/jvarkit/vcfpolyx/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/jvarkit/vcfpolyx/main.nf b/modules/nf-core/jvarkit/vcfpolyx/main.nf index c0b594953a3..6afc27421ec 100644 --- a/modules/nf-core/jvarkit/vcfpolyx/main.nf +++ b/modules/nf-core/jvarkit/vcfpolyx/main.nf @@ -45,9 +45,9 @@ process JVARKIT_VCFPOLYX { mkdir -p TMP bcftools view -O v ${args1} "${vcf}" |\\ - jvarkit -Xmx${task.memory.giga}g -XX:-UsePerfData -Djava.io.tmpdir=TMP vcfpolyx --reference "${fasta}" ${args2} |\\ + jvarkit -Xmx${task.memory.giga}g -XX:-UsePerfData -Djava.io.tmpdir=TMP vcfpolyx --reference "${fasta}" ${args2} |\\ bcftools view --output "${prefix}.${extension}" ${args3} - + rm -rf TMP cat <<-END_VERSIONS > versions.yml From aa0eaeee267000e1a1b69e0d76e20f16d8bdbfe1 Mon Sep 17 00:00:00 2001 From: lindenb Date: Thu, 5 Sep 2024 12:48:04 +0200 Subject: [PATCH 06/16] remove ws --- modules/nf-core/jvarkit/vcfpolyx/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/jvarkit/vcfpolyx/main.nf b/modules/nf-core/jvarkit/vcfpolyx/main.nf index 6afc27421ec..ae4cec244d2 100644 --- a/modules/nf-core/jvarkit/vcfpolyx/main.nf +++ b/modules/nf-core/jvarkit/vcfpolyx/main.nf @@ -43,11 +43,11 @@ process JVARKIT_VCFPOLYX { """ set -o pipefail mkdir -p TMP - + bcftools view -O v ${args1} "${vcf}" |\\ jvarkit -Xmx${task.memory.giga}g -XX:-UsePerfData -Djava.io.tmpdir=TMP vcfpolyx --reference "${fasta}" ${args2} |\\ bcftools view --output "${prefix}.${extension}" ${args3} - + rm -rf TMP cat <<-END_VERSIONS > versions.yml From dceb14df6faa9634b97351e6f1ef71edf7597fe3 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Thu, 5 Sep 2024 10:38:08 +0000 Subject: [PATCH 07/16] [automated] Fix linting with Prettier --- modules/nf-core/jvarkit/vcfpolyx/meta.yml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/modules/nf-core/jvarkit/vcfpolyx/meta.yml b/modules/nf-core/jvarkit/vcfpolyx/meta.yml index af2bb78a55f..250168894cc 100644 --- a/modules/nf-core/jvarkit/vcfpolyx/meta.yml +++ b/modules/nf-core/jvarkit/vcfpolyx/meta.yml @@ -8,14 +8,13 @@ keywords: - annotation - repeats tools: - - "jvarkit": description: "Java utilities for Bioinformatics." homepage: "https://github.com/lindenb/jvarkit" documentation: "https://jvarkit.readthedocs.io/" tool_dev_url: "https://github.com/lindenb/jvarkit" doi: "10.6084/m9.figshare.1425030" - licence: ['MIT License'] + licence: ["MIT License"] - "bcftools": description: | @@ -28,22 +27,22 @@ tools: input: - vcf: type: file - description: Groovy Map containing reference genome information for vcf + description: Groovy Map containing reference genome information for vcf pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" - fasta: type: file - description: Groovy Map containing reference genome information for fai reference fasta file + description: Groovy Map containing reference genome information for fai reference fasta file pattern: "*.fasta" - fai: type: file - description: Groovy Map containing reference genome information for fai + description: Groovy Map containing reference genome information for fai pattern: "*.fasta.fai" - dict: type: file - description: Groovy Map containing reference genome information for GATK sequence dictionary + description: Groovy Map containing reference genome information for GATK sequence dictionary pattern: "*.dict" output: From eef5c98b4c728c758a798e01cf59fdcec9e3cfc0 Mon Sep 17 00:00:00 2001 From: lindenb Date: Thu, 5 Sep 2024 12:55:09 +0200 Subject: [PATCH 08/16] fix conda ? --- modules/nf-core/jvarkit/vcfpolyx/environment.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/nf-core/jvarkit/vcfpolyx/environment.yml b/modules/nf-core/jvarkit/vcfpolyx/environment.yml index 9ffce950e45..04fc58514be 100644 --- a/modules/nf-core/jvarkit/vcfpolyx/environment.yml +++ b/modules/nf-core/jvarkit/vcfpolyx/environment.yml @@ -3,5 +3,7 @@ name: "jvarkit_vcfpolyx" channels: - bioconda + - conda-forge + - defaults dependencies: - "bioconda::jvarkit=2024.08.25" From 801698a94784ae17b0c47c671e1a9cf518bec6ce Mon Sep 17 00:00:00 2001 From: lindenb Date: Thu, 5 Sep 2024 13:48:32 +0200 Subject: [PATCH 09/16] remove defaults --- modules/nf-core/jvarkit/vcfpolyx/environment.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/nf-core/jvarkit/vcfpolyx/environment.yml b/modules/nf-core/jvarkit/vcfpolyx/environment.yml index 04fc58514be..cf7e84012ee 100644 --- a/modules/nf-core/jvarkit/vcfpolyx/environment.yml +++ b/modules/nf-core/jvarkit/vcfpolyx/environment.yml @@ -4,6 +4,5 @@ name: "jvarkit_vcfpolyx" channels: - bioconda - conda-forge - - defaults dependencies: - "bioconda::jvarkit=2024.08.25" From 797639a885e32b143e20e13fdd59302b0613f1eb Mon Sep 17 00:00:00 2001 From: lindenb Date: Thu, 5 Sep 2024 14:02:42 +0200 Subject: [PATCH 10/16] fix meta info --- modules/nf-core/jvarkit/vcfpolyx/meta.yml | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/modules/nf-core/jvarkit/vcfpolyx/meta.yml b/modules/nf-core/jvarkit/vcfpolyx/meta.yml index 250168894cc..bb915a543cd 100644 --- a/modules/nf-core/jvarkit/vcfpolyx/meta.yml +++ b/modules/nf-core/jvarkit/vcfpolyx/meta.yml @@ -25,6 +25,22 @@ tools: licence: ["MIT"] input: + -meta: + description: | + Groovy Map containing VCF information + + -meta2: + description: | + Groovy Map containing fasta information + + -meta2: + description: | + Groovy Map containing fasta.fai information + + -meta3: + description: | + Groovy Map containing fasta.dict information + - vcf: type: file description: Groovy Map containing reference genome information for vcf @@ -49,7 +65,7 @@ output: - meta: type: map description: | - Groovy Map containing sample information + Groovy Map containing VCF information e.g. [ id:'test', single_end:false ] - vcf: type: file From 0acd7af7984c828d5b8a68f09d93f1a51635136b Mon Sep 17 00:00:00 2001 From: lindenb Date: Thu, 5 Sep 2024 14:13:10 +0200 Subject: [PATCH 11/16] I hate myslef, fix warnings --- modules/nf-core/jvarkit/vcfpolyx/meta.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/nf-core/jvarkit/vcfpolyx/meta.yml b/modules/nf-core/jvarkit/vcfpolyx/meta.yml index bb915a543cd..d3d56b75bc1 100644 --- a/modules/nf-core/jvarkit/vcfpolyx/meta.yml +++ b/modules/nf-core/jvarkit/vcfpolyx/meta.yml @@ -25,19 +25,19 @@ tools: licence: ["MIT"] input: - -meta: + - meta: description: | Groovy Map containing VCF information - -meta2: + - meta2: description: | Groovy Map containing fasta information - -meta2: + - meta2: description: | Groovy Map containing fasta.fai information - -meta3: + - meta3: description: | Groovy Map containing fasta.dict information From 4064e3c60f02fc142ef7ef440b5b74c286e29ba5 Mon Sep 17 00:00:00 2001 From: lindenb Date: Thu, 5 Sep 2024 14:19:15 +0200 Subject: [PATCH 12/16] add type to meta --- modules/nf-core/jvarkit/vcfpolyx/meta.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/nf-core/jvarkit/vcfpolyx/meta.yml b/modules/nf-core/jvarkit/vcfpolyx/meta.yml index d3d56b75bc1..10bcfdd1188 100644 --- a/modules/nf-core/jvarkit/vcfpolyx/meta.yml +++ b/modules/nf-core/jvarkit/vcfpolyx/meta.yml @@ -26,18 +26,22 @@ tools: input: - meta: + type: map description: | Groovy Map containing VCF information - meta2: + type: map description: | Groovy Map containing fasta information - meta2: + type: map description: | Groovy Map containing fasta.fai information - meta3: + type: map description: | Groovy Map containing fasta.dict information From 67e6c915396b6e8a190c8c2228fd9eee21610575 Mon Sep 17 00:00:00 2001 From: lindenb Date: Thu, 5 Sep 2024 14:51:30 +0200 Subject: [PATCH 13/16] fix snap md5 --- modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test.snap | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test.snap b/modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test.snap index 618902b4502..e5ccf191472 100644 --- a/modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test.snap +++ b/modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test.snap @@ -3,7 +3,7 @@ "content": [ "65a03a6057dc74467c2b7b17230e7f14", [ - "versions.yml:md5,dcb0bbe3f1b3aa091708acb0fb6361d5" + "versions.yml:md5,b3c351a56da9062295ef90011a9cd48c" ] ], "meta": { From 75b3a9afeead6f97fabf05f75b64c616d3123801 Mon Sep 17 00:00:00 2001 From: lindenb Date: Thu, 5 Sep 2024 15:19:50 +0200 Subject: [PATCH 14/16] fix meta3/meta4 https://github.com/nf-core/modules/pull/6580#discussion_r1745494016 --- modules/nf-core/jvarkit/vcfpolyx/meta.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/jvarkit/vcfpolyx/meta.yml b/modules/nf-core/jvarkit/vcfpolyx/meta.yml index 10bcfdd1188..221f7e9ca51 100644 --- a/modules/nf-core/jvarkit/vcfpolyx/meta.yml +++ b/modules/nf-core/jvarkit/vcfpolyx/meta.yml @@ -35,12 +35,12 @@ input: description: | Groovy Map containing fasta information - - meta2: + - meta3: type: map description: | Groovy Map containing fasta.fai information - - meta3: + - meta4: type: map description: | Groovy Map containing fasta.dict information From 8b2115a740fde92a9bc612a9a9361a6251429cf4 Mon Sep 17 00:00:00 2001 From: lindenb Date: Thu, 5 Sep 2024 15:34:11 +0200 Subject: [PATCH 15/16] fix https://github.com/nf-core/modules/pull/6580#discussion_r1745463698 --- modules/nf-core/jvarkit/vcfpolyx/main.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/nf-core/jvarkit/vcfpolyx/main.nf b/modules/nf-core/jvarkit/vcfpolyx/main.nf index ae4cec244d2..9ae4269bae0 100644 --- a/modules/nf-core/jvarkit/vcfpolyx/main.nf +++ b/modules/nf-core/jvarkit/vcfpolyx/main.nf @@ -41,7 +41,6 @@ process JVARKIT_VCFPOLYX { if ("$vcf" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" """ - set -o pipefail mkdir -p TMP bcftools view -O v ${args1} "${vcf}" |\\ From da213ab4bbe95a8ca37cd42f7ff4d3a66bf1cfd0 Mon Sep 17 00:00:00 2001 From: Pierre Lindenbaum <33838+lindenb@users.noreply.github.com> Date: Thu, 5 Sep 2024 15:40:56 +0200 Subject: [PATCH 16/16] Update modules/nf-core/jvarkit/vcfpolyx/main.nf Co-authored-by: Maxime U Garcia --- modules/nf-core/jvarkit/vcfpolyx/main.nf | 5 ----- 1 file changed, 5 deletions(-) diff --git a/modules/nf-core/jvarkit/vcfpolyx/main.nf b/modules/nf-core/jvarkit/vcfpolyx/main.nf index 9ae4269bae0..77005f6fc46 100644 --- a/modules/nf-core/jvarkit/vcfpolyx/main.nf +++ b/modules/nf-core/jvarkit/vcfpolyx/main.nf @@ -1,8 +1,3 @@ -/** - * JVARKIT_VCFPOLYX - * Author: Pierre Lindenbaum PhD - * vcfpolyx is a sub-command of the jvarkit package. It is used to annotate a vcf for the poly-x repeats - */ process JVARKIT_VCFPOLYX { tag "$meta.id" label 'process_single'