From ce819e31203cf871b2f20213763cac94c360048e Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 26 Jul 2023 12:25:24 +0200 Subject: [PATCH 01/21] adding pon with mutect2 --- conf/igenomes.config | 1 - modules.json | 24 ++++ .../gatk4/createsomaticpanelofnormals/main.nf | 47 ++++++++ .../createsomaticpanelofnormals/meta.yml | 70 ++++++++++++ .../nf-core/gatk4/genomicsdbimport/main.nf | 103 +++++++++++++++++ .../nf-core/gatk4/genomicsdbimport/meta.yml | 84 ++++++++++++++ modules/nf-core/gatk4/mutect2/main.nf | 74 ++++++++++++ modules/nf-core/gatk4/mutect2/meta.yml | 105 ++++++++++++++++++ .../nf-core/bam_create_som_pon_gatk/main.nf | 69 ++++++++++++ .../nf-core/bam_create_som_pon_gatk/meta.yml | 67 +++++++++++ workflows/createpanelrefs.nf | 17 ++- 11 files changed, 657 insertions(+), 4 deletions(-) create mode 100644 modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf create mode 100644 modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml create mode 100644 modules/nf-core/gatk4/genomicsdbimport/main.nf create mode 100644 modules/nf-core/gatk4/genomicsdbimport/meta.yml create mode 100644 modules/nf-core/gatk4/mutect2/main.nf create mode 100644 modules/nf-core/gatk4/mutect2/meta.yml create mode 100644 subworkflows/nf-core/bam_create_som_pon_gatk/main.nf create mode 100644 subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml diff --git a/conf/igenomes.config b/conf/igenomes.config index 614ef61..73e84d9 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -280,7 +280,6 @@ params { 'GATK.GRCh38' { bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAIndex/" bwamem2 = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAmem2Index/" - cf_chrom_len = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Length/Homo_sapiens_assembly38.len" dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz" dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz.tbi" dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.dict" diff --git a/modules.json b/modules.json index 29e931c..7f606c6 100644 --- a/modules.json +++ b/modules.json @@ -15,12 +15,36 @@ "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, + "gatk4/createsomaticpanelofnormals": { + "branch": "master", + "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", + "installed_by": ["bam_create_som_pon_gatk"] + }, + "gatk4/genomicsdbimport": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["bam_create_som_pon_gatk"] + }, + "gatk4/mutect2": { + "branch": "master", + "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", + "installed_by": ["bam_create_som_pon_gatk"] + }, "multiqc": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] } } + }, + "subworkflows": { + "nf-core": { + "bam_create_som_pon_gatk": { + "branch": "master", + "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", + "installed_by": ["subworkflows"] + } + } } } } diff --git a/modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf b/modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf new file mode 100644 index 0000000..e5557c7 --- /dev/null +++ b/modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf @@ -0,0 +1,47 @@ +process GATK4_CREATESOMATICPANELOFNORMALS { + tag "$meta.id" + label 'process_low' + + conda "bioconda::gatk4=4.4.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(genomicsdb) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK CreateSomaticPanelOfNormals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M" CreateSomaticPanelOfNormals \\ + --variant gendb://$genomicsdb \\ + --output ${prefix}.vcf.gz \\ + --reference $fasta \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml b/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml new file mode 100644 index 0000000..2f49cf2 --- /dev/null +++ b/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml @@ -0,0 +1,70 @@ +name: gatk4_createsomaticpanelofnormals +description: Create a panel of normals contraining germline and artifactual sites for use with mutect2. +keywords: + - gatk4 + - createsomaticpanelofnormals + - panelofnormals +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - genoomicsdb: + type: directory + description: genomicsDB workspace that contains the samples to create the somatic panel of normals with. + pattern: "*_genomicsDBworkspace" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test'] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test'] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test'] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + +output: + - vcf: + type: file + description: panel of normal as compressed vcf file + pattern: "*.vcf.gz" + - tbi: + type: file + description: Tabix index of vcf file + pattern: "*vcf.gz.tbi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@GCJMackenzie" diff --git a/modules/nf-core/gatk4/genomicsdbimport/main.nf b/modules/nf-core/gatk4/genomicsdbimport/main.nf new file mode 100644 index 0000000..dc77345 --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/main.nf @@ -0,0 +1,103 @@ +process GATK4_GENOMICSDBIMPORT { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::gatk4=4.4.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(vcf), path(tbi), path(interval_file), val(interval_value), path(wspace) + val run_intlist + val run_updatewspace + val input_map + + output: + tuple val(meta), path("$prefix") , optional:true, emit: genomicsdb + tuple val(meta), path("$updated_db") , optional:true, emit: updatedb + tuple val(meta), path("*.interval_list"), optional:true, emit: intervallist + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + // settings for running default create gendb mode + input_command = input_map ? "--sample-name-map ${vcf[0]}" : vcf.collect(){"--variant $it"}.join(' ') + + genomicsdb_command = "--genomicsdb-workspace-path ${prefix}" + interval_command = interval_file ? "--intervals ${interval_file}" : "--intervals ${interval_value}" + updated_db = "" + + // settings changed for running get intervals list mode if run_intlist is true + if (run_intlist) { + genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}" + interval_command = "--output-interval-list-to-file ${prefix}.interval_list" + } + + // settings changed for running update gendb mode. input_command same as default, update_db forces module to emit the updated gendb + if (run_updatewspace) { + genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}" + interval_command = '' + updated_db = "${wspace}" + } + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK GenomicsDBImport] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M" GenomicsDBImport \\ + $input_command \\ + $genomicsdb_command \\ + $interval_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + + genomicsdb_command = "--genomicsdb-workspace-path ${prefix}" + interval_command = interval_file ? "--intervals ${interval_file}" : "--intervals ${interval_value}" + updated_db = "" + + // settings changed for running get intervals list mode if run_intlist is true + if (run_intlist) { + genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}" + interval_command = "--output-interval-list-to-file ${prefix}.interval_list" + } + + // settings changed for running update gendb mode. input_command same as default, update_db forces module to emit the updated gendb + if (run_updatewspace) { + genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}" + interval_command = '' + updated_db = "${wspace}" + } + + def stub_genomicsdb = genomicsdb_command == "--genomicsdb-workspace-path ${prefix}" ? "touch ${prefix}" : "" + def stub_interval = interval_command == "--output-interval-list-to-file ${prefix}.interval_list" ? "touch ${prefix}.interval_list" : "" + def stub_update = updated_db != "" ? "touch ${wspace}" : "" + + """ + ${stub_genomicsdb} + ${stub_interval} + ${stub_update} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/genomicsdbimport/meta.yml b/modules/nf-core/gatk4/genomicsdbimport/meta.yml new file mode 100644 index 0000000..af626cb --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/meta.yml @@ -0,0 +1,84 @@ +name: gatk4_genomicsdbimport +description: merge GVCFs from multiple samples. For use in joint genotyping or somatic panel of normal creation. +keywords: + - gatk4 + - genomicsdbimport + - genomicsdb + - panelofnormalscreation + - jointgenotyping +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - vcf: + type: list + description: either a list of vcf files to be used to create or update a genomicsdb, or a file that contains a map to vcf files to be used. + pattern: "*.vcf.gz" + + - tbi: + type: list + description: list of tbi files that match with the input vcf files + pattern: "*.vcf.gz_tbi" + + - wspace: + type: path + description: path to an existing genomicsdb to be used in update db mode or get intervals mode. This WILL NOT specify name of a new genomicsdb in create db mode. + pattern: "/path/to/existing/gendb" + + - intervalfile: + type: file + description: file containing the intervals to be used when creating the genomicsdb + pattern: "*.interval_list" + + - intervalval: + type: string + description: if an intervals file has not been spcified, the value enetered here will be used as an interval via the "-L" argument + pattern: "example: chr1:1000-10000" + + - run_intlist: + type: boolean + description: Specify whether to run get interval list mode, this option cannot be specified at the same time as run_updatewspace. + pattern: "true/false" + + - run_updatewspace: + type: boolean + description: Specify whether to run update genomicsdb mode, this option takes priority over run_intlist. + pattern: "true/false" + + - input_map: + type: boolean + description: Specify whether the vcf input is providing a list of vcf file(s) or a single file containing a map of paths to vcf files to be used to create or update a genomicsdb. + pattern: "*.sample_map" + +output: + - genomicsdb: + type: directory + description: Directory containing the files that compose the genomicsdb workspace, this is only output for create mode, as update changes an existing db + pattern: "*/$prefix" + - updatedb: + type: directory + description: Directory containing the files that compose the updated genomicsdb workspace, this is only output for update mode, and should be the same path as the input wspace. + pattern: "same/path/as/wspace" + - intervallist: + type: file + description: File containing the intervals used to generate the genomicsdb, only created by get intervals mode. + pattern: "*.interval_list" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@GCJMackenzie" diff --git a/modules/nf-core/gatk4/mutect2/main.nf b/modules/nf-core/gatk4/mutect2/main.nf new file mode 100644 index 0000000..bddc368 --- /dev/null +++ b/modules/nf-core/gatk4/mutect2/main.nf @@ -0,0 +1,74 @@ +process GATK4_MUTECT2 { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::gatk4=4.4.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(input), path(input_index), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + path(germline_resource) + path(germline_resource_tbi) + path(panel_of_normals) + path(panel_of_normals_tbi) + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi + tuple val(meta), path("*.stats") , emit: stats + tuple val(meta), path("*.f1r2.tar.gz"), optional:true, emit: f1r2 + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def inputs = input.collect{ "--input $it"}.join(" ") + def interval_command = intervals ? "--intervals $intervals" : "" + def pon_command = panel_of_normals ? "--panel-of-normals $panel_of_normals" : "" + def gr_command = germline_resource ? "--germline-resource $germline_resource" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M" Mutect2 \\ + $inputs \\ + --output ${prefix}.vcf.gz \\ + --reference $fasta \\ + $pon_command \\ + $gr_command \\ + $interval_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + touch ${prefix}.vcf.gz.stats + touch ${prefix}.f1r2.tar.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/mutect2/meta.yml b/modules/nf-core/gatk4/mutect2/meta.yml new file mode 100644 index 0000000..4842c22 --- /dev/null +++ b/modules/nf-core/gatk4/mutect2/meta.yml @@ -0,0 +1,105 @@ +name: gatk4_mutect2 +description: Call somatic SNVs and indels via local assembly of haplotypes. +keywords: + - gatk4 + - mutect2 + - haplotype + - somatic +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - input: + type: list + description: list of BAM files, also able to take CRAM as an input + pattern: "*.{bam/cram}" + - input_index: + type: list + description: list of BAM file indexes, also able to take CRAM indexes as an input + pattern: "*.{bam.bai/cram.crai}" + - intervals: + type: file + description: Specify region the tools is run on. + pattern: ".{bed,interval_list}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - germline_resource: + type: file + description: Population vcf of germline sequencing, containing allele fractions. + pattern: "*.vcf.gz" + - germline_resource_tbi: + type: file + description: Index file for the germline resource. + pattern: "*.vcf.gz.tbi" + - panel_of_normals: + type: file + description: vcf file to be used as a panel of normals. + pattern: "*.vcf.gz" + - panel_of_normals_tbi: + type: file + description: Index for the panel of normals. + pattern: "*.vcf.gz.tbi" + +output: + - vcf: + type: file + description: compressed vcf file + pattern: "*.vcf.gz" + - tbi: + type: file + description: Index of vcf file + pattern: "*vcf.gz.tbi" + - stats: + type: file + description: Stats file that pairs with output vcf file + pattern: "*vcf.gz.stats" + - f1r2: + type: file + description: file containing information to be passed to LearnReadOrientationModel (only outputted when tumor_normal_pair mode is run) + pattern: "*.f1r2.tar.gz" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@GCJMackenzie" + - "@ramprasadn" diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/main.nf b/subworkflows/nf-core/bam_create_som_pon_gatk/main.nf new file mode 100644 index 0000000..a4b2c2c --- /dev/null +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/main.nf @@ -0,0 +1,69 @@ +// +// Run GATK mutect2, genomicsdbimport and createsomaticpanelofnormals +// + +include { GATK4_MUTECT2 } from '../../../modules/nf-core/gatk4/mutect2/main' +include { GATK4_GENOMICSDBIMPORT } from '../../../modules/nf-core/gatk4/genomicsdbimport/main' +include { GATK4_CREATESOMATICPANELOFNORMALS } from '../../../modules/nf-core/gatk4/createsomaticpanelofnormals/main' + +workflow BAM_CREATE_SOM_PON_GATK { + take: + ch_mutect2_in // channel: [ val(meta), path(input), path(input_index), path(interval_file) ] + ch_fasta // channel: [ val(meta), path(fasta) ] + ch_fai // channel: [ val(meta), path(fai) ] + ch_dict // channel: [ val(meta), path(dict) ] + val_pon_norm // string: name for panel of normals + ch_gendb_intervals // channel: [ path(interval_file) ] + + main: + ch_versions = Channel.empty() + ch_input = ch_mutect2_in + + // + // Perform variant calling for each sample using mutect2 module in panel of normals mode. + // + GATK4_MUTECT2 ( + ch_input, + ch_fasta, + ch_fai, + ch_dict, + [], + [], + [], + [] + ) + ch_versions = ch_versions.mix(GATK4_MUTECT2.out.versions.first()) + + // + // Convert all sample vcfs into a genomicsdb workspace using genomicsdbimport. + // + ch_vcf = GATK4_MUTECT2.out.vcf.collect{it[1]}.toList() + ch_index = GATK4_MUTECT2.out.tbi.collect{it[1]}.toList() + ch_dict_gendb = ch_dict.map{meta, dict -> return dict}.toList() + + ch_gendb_input = Channel.of([id:val_pon_norm]) + .combine(ch_vcf) + .combine(ch_index) + .combine(ch_gendb_intervals) + .combine(ch_dict_gendb) + .map{meta, vcf, tbi, interval, dict -> [meta, vcf, tbi, interval, [], dict]} + + GATK4_GENOMICSDBIMPORT ( ch_gendb_input, false, false, false ) + ch_versions = ch_versions.mix(GATK4_GENOMICSDBIMPORT.out.versions.first()) + + // + //Panel of normals made from genomicsdb workspace using createsomaticpanelofnormals. + // + GATK4_CREATESOMATICPANELOFNORMALS ( GATK4_GENOMICSDBIMPORT.out.genomicsdb, ch_fasta, ch_fai, ch_dict ) + ch_versions = ch_versions.mix(GATK4_CREATESOMATICPANELOFNORMALS.out.versions.first()) + + emit: + mutect2_vcf = GATK4_MUTECT2.out.vcf // channel: [ val(meta), path(vcf) ] + mutect2_index = GATK4_MUTECT2.out.tbi // channel: [ val(meta), path(tbi) ] + mutect2_stats = GATK4_MUTECT2.out.stats // channel: [ val(meta), path(stats) ] + genomicsdb = GATK4_GENOMICSDBIMPORT.out.genomicsdb // channel: [ val(meta), path(genomicsdb) ] + pon_vcf = GATK4_CREATESOMATICPANELOFNORMALS.out.vcf // channel: [ val(meta), path(vcf) ] + pon_index = GATK4_CREATESOMATICPANELOFNORMALS.out.tbi // channel: [ val(meta), path(tbi) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml b/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml new file mode 100644 index 0000000..e682f7e --- /dev/null +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml @@ -0,0 +1,67 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_create_som_pon_gatk +description: Perform variant calling on a set of normal samples using mutect2 panel of normals mode. Group them into a genomicsdbworkspace using genomicsdbimport, then use this to create a panel of normals using createsomaticpanelofnormals. +keywords: + - gatk4 + - mutect2 + - genomicsdbimport + - createsomaticpanelofnormals + - variant_calling + - genomicsdb_workspace + - panel_of_normals +modules: + - gatk4/mutect2 + - gatk4/genomicsdbimport + - gatk4/createsomaticpanelofnormals +input: + - ch_mutect2_in: + type: list + description: | + An input channel containing the following files: + - input: One or more BAM/CRAM files + - input_index: The index/indices from the BAM/CRAM file(s) + - interval_file: An interval file to be used with the mutect call + Structure: [ meta, input, input_index, interval_file ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - mutect2_vcf: + type: list + description: List of compressed vcf files to be used to make the gendb workspace + pattern: "[ *.vcf.gz ]" + - mutect2_index: + type: list + description: List of indexes of mutect2_vcf files + pattern: "[ *vcf.gz.tbi ]" + - mutect2_stats: + type: list + description: List of stats files that pair with mutect2_vcf files + pattern: "[ *vcf.gz.stats ]" + - genomicsdb: + type: directory + description: Directory containing the files that compose the genomicsdb workspace. + pattern: "path/name_of_workspace" + - pon_vcf: + type: file + description: Panel of normal as compressed vcf file + pattern: "*.vcf.gz" + - pon_index: + type: file + description: Index of pon_vcf file + pattern: "*vcf.gz.tbi" +authors: + - "@GCJMackenzie" diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 11b7c6e..2e1dc58 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -18,7 +18,10 @@ WorkflowCreatepanelrefs.initialise(params, log) // Check input path parameters to see if they exist def checkPathParamList = [ - params.fasta + params.dict, + params.fasta, + params.fasta_fai, + params.input ] /* @@ -46,7 +49,9 @@ ch_input = ch_from_samplesheet.map{meta, bam, bai, cram, crai -> } // Initialize file channels based on params, defined in the params.genomes[params.genome] scope -ch_fasta = params.fasta ? Channel.fromPath(params.fasta).first() : Channel.empty() +ch_dict = params.dict ? Channel.fromPath(params.dict).first() : Channel.empty() +ch_fasta = params.fasta ? Channel.fromPath(params.fasta).first() : Channel.empty() +ch_fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).first() : Channel.empty() /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -82,6 +87,7 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil include { CNVKIT_BATCH } from '../modules/nf-core/cnvkit/batch/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { BAM_CREATE_SOM_PON_GATK } from '../subworkflows/nf-core/bam_create_som_pon_gatk/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -97,10 +103,15 @@ workflow CREATEPANELREFS { ch_versions = Channel.empty() if (params.tools && params.tools.split(',').contains('cnvkit')) { - CNVKIT_BATCH ( ch_input.bam.map{meta, bam -> [ meta, [], bam ]}, ch_fasta, [], [], [], true ) + CNVKIT_BATCH ( ch_input.bam.map{ meta, bam -> [ meta, [], bam ]}, ch_fasta, [], [], [], true ) ch_versions = ch_versions.mix(CNVKIT_BATCH.out.versions) } + if (params.tools && params.tools.split(',').contains('mutect2')) { + BAM_CREATE_SOM_PON_GATK ( ch_input.cram.map{ meta, cram -> [ meta, bam ]}, ch_fasta, ch_fasta_fai, ch_dict, params.pon_name, [] ) + ch_versions = ch_versions.mix(BAM_CREATE_SOM_PON_GATK.out.versions) + } + CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') ) From caf4c90cb9d8f4b3bf282dd20a6a4599452ab40f Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 14:10:04 +0200 Subject: [PATCH 02/21] update bam_create_som_pon_gatk subworkflow --- modules.json | 51 ++---- .../environment.yml | 7 + .../gatk4/createsomaticpanelofnormals/main.nf | 9 +- .../createsomaticpanelofnormals/meta.yml | 7 +- .../gatk4/genomicsdbimport/environment.yml | 7 + .../nf-core/gatk4/genomicsdbimport/main.nf | 9 +- .../nf-core/gatk4/genomicsdbimport/meta.yml | 22 +-- .../gatk4/genomicsdbimport/tests/main.nf.test | 155 ++++++++++++++++++ .../genomicsdbimport/tests/main.nf.test.snap | 40 +++++ .../genomicsdbimport/tests/nextflow.config | 2 + .../gatk4/genomicsdbimport/tests/tags.yml | 3 + modules/nf-core/gatk4/mutect2/environment.yml | 7 + modules/nf-core/gatk4/mutect2/main.nf | 9 +- modules/nf-core/gatk4/mutect2/meta.yml | 10 +- .../nf-core/bam_create_som_pon_gatk/meta.yml | 4 +- .../tests/main.nf.test | 48 ++++++ .../tests/main.nf.test.snap | 52 ++++++ .../tests/nextflow.config | 5 + .../bam_create_som_pon_gatk/tests/tags.yml | 2 + 19 files changed, 378 insertions(+), 71 deletions(-) create mode 100644 modules/nf-core/gatk4/createsomaticpanelofnormals/environment.yml create mode 100644 modules/nf-core/gatk4/genomicsdbimport/environment.yml create mode 100644 modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test create mode 100644 modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test.snap create mode 100644 modules/nf-core/gatk4/genomicsdbimport/tests/nextflow.config create mode 100644 modules/nf-core/gatk4/genomicsdbimport/tests/tags.yml create mode 100644 modules/nf-core/gatk4/mutect2/environment.yml create mode 100644 subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test create mode 100644 subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test.snap create mode 100644 subworkflows/nf-core/bam_create_som_pon_gatk/tests/nextflow.config create mode 100644 subworkflows/nf-core/bam_create_som_pon_gatk/tests/tags.yml diff --git a/modules.json b/modules.json index 582b9e5..57887c1 100644 --- a/modules.json +++ b/modules.json @@ -30,6 +30,11 @@ "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, + "gatk4/createsomaticpanelofnormals": { + "branch": "master", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", + "installed_by": ["bam_create_som_pon_gatk"] + }, "gatk4/determinegermlinecontigploidy": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", @@ -40,35 +45,30 @@ "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, - "gatk4/germlinecnvcaller": { + "gatk4/genomicsdbimport": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["bam_create_som_pon_gatk"] }, - "gatk4/intervallisttools": { + "gatk4/germlinecnvcaller": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, - "gatk4/preprocessintervals": { + "gatk4/intervallisttools": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, - "gatk4/createsomaticpanelofnormals": { - "branch": "master", - "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", - "installed_by": ["bam_create_som_pon_gatk"] - }, - "gatk4/genomicsdbimport": { + "gatk4/mutect2": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["bam_create_som_pon_gatk"] }, - "gatk4/mutect2": { + "gatk4/preprocessintervals": { "branch": "master", - "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", - "installed_by": ["bam_create_som_pon_gatk"] + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] }, "multiqc": { "branch": "master", @@ -92,30 +92,11 @@ } } }, - "subworkflows": { - "nf-core": { - "utils_nextflow_pipeline": { - "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] - }, - "utils_nfcore_pipeline": { - "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] - }, - "utils_nfvalidation_plugin": { - "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] - } - } - }, "subworkflows": { "nf-core": { "bam_create_som_pon_gatk": { "branch": "master", - "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["subworkflows"] } } diff --git a/modules/nf-core/gatk4/createsomaticpanelofnormals/environment.yml b/modules/nf-core/gatk4/createsomaticpanelofnormals/environment.yml new file mode 100644 index 0000000..ae543c6 --- /dev/null +++ b/modules/nf-core/gatk4/createsomaticpanelofnormals/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_createsomaticpanelofnormals +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf b/modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf index e5557c7..27a50dc 100644 --- a/modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf +++ b/modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf @@ -2,10 +2,10 @@ process GATK4_CREATESOMATICPANELOFNORMALS { tag "$meta.id" label 'process_low' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(genomicsdb) @@ -32,7 +32,8 @@ process GATK4_CREATESOMATICPANELOFNORMALS { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}M" CreateSomaticPanelOfNormals \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + CreateSomaticPanelOfNormals \\ --variant gendb://$genomicsdb \\ --output ${prefix}.vcf.gz \\ --reference $fasta \\ diff --git a/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml b/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml index 2f49cf2..9c3ee19 100644 --- a/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml +++ b/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml @@ -1,8 +1,8 @@ name: gatk4_createsomaticpanelofnormals description: Create a panel of normals contraining germline and artifactual sites for use with mutect2. keywords: - - gatk4 - createsomaticpanelofnormals + - gatk4 - panelofnormals tools: - gatk4: @@ -13,7 +13,6 @@ tools: homepage: https://gatk.broadinstitute.org/hc/en-us documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 - input: - meta: type: map @@ -51,7 +50,6 @@ input: type: file description: GATK sequence dictionary pattern: "*.dict" - output: - vcf: type: file @@ -65,6 +63,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@GCJMackenzie" +maintainers: + - "@GCJMackenzie" diff --git a/modules/nf-core/gatk4/genomicsdbimport/environment.yml b/modules/nf-core/gatk4/genomicsdbimport/environment.yml new file mode 100644 index 0000000..a3a1363 --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_genomicsdbimport +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/genomicsdbimport/main.nf b/modules/nf-core/gatk4/genomicsdbimport/main.nf index dc77345..6f1d4c5 100644 --- a/modules/nf-core/gatk4/genomicsdbimport/main.nf +++ b/modules/nf-core/gatk4/genomicsdbimport/main.nf @@ -2,10 +2,10 @@ process GATK4_GENOMICSDBIMPORT { tag "$meta.id" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(vcf), path(tbi), path(interval_file), val(interval_value), path(wspace) @@ -53,7 +53,8 @@ process GATK4_GENOMICSDBIMPORT { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}M" GenomicsDBImport \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + GenomicsDBImport \\ $input_command \\ $genomicsdb_command \\ $interval_command \\ diff --git a/modules/nf-core/gatk4/genomicsdbimport/meta.yml b/modules/nf-core/gatk4/genomicsdbimport/meta.yml index af626cb..11e565b 100644 --- a/modules/nf-core/gatk4/genomicsdbimport/meta.yml +++ b/modules/nf-core/gatk4/genomicsdbimport/meta.yml @@ -2,10 +2,10 @@ name: gatk4_genomicsdbimport description: merge GVCFs from multiple samples. For use in joint genotyping or somatic panel of normal creation. keywords: - gatk4 - - genomicsdbimport - genomicsdb - - panelofnormalscreation + - genomicsdbimport - jointgenotyping + - panelofnormalscreation tools: - gatk4: description: | @@ -15,7 +15,6 @@ tools: homepage: https://gatk.broadinstitute.org/hc/en-us documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 - input: - meta: type: map @@ -26,42 +25,34 @@ input: type: list description: either a list of vcf files to be used to create or update a genomicsdb, or a file that contains a map to vcf files to be used. pattern: "*.vcf.gz" - - tbi: type: list description: list of tbi files that match with the input vcf files pattern: "*.vcf.gz_tbi" - - wspace: - type: path + type: file description: path to an existing genomicsdb to be used in update db mode or get intervals mode. This WILL NOT specify name of a new genomicsdb in create db mode. pattern: "/path/to/existing/gendb" - - - intervalfile: + - interval_file: type: file description: file containing the intervals to be used when creating the genomicsdb pattern: "*.interval_list" - - - intervalval: + - interval_value: type: string description: if an intervals file has not been spcified, the value enetered here will be used as an interval via the "-L" argument pattern: "example: chr1:1000-10000" - - run_intlist: type: boolean description: Specify whether to run get interval list mode, this option cannot be specified at the same time as run_updatewspace. pattern: "true/false" - - run_updatewspace: type: boolean description: Specify whether to run update genomicsdb mode, this option takes priority over run_intlist. pattern: "true/false" - - input_map: type: boolean description: Specify whether the vcf input is providing a list of vcf file(s) or a single file containing a map of paths to vcf files to be used to create or update a genomicsdb. pattern: "*.sample_map" - output: - genomicsdb: type: directory @@ -79,6 +70,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@GCJMackenzie" +maintainers: + - "@GCJMackenzie" diff --git a/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test b/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test new file mode 100644 index 0000000..9c207b3 --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test @@ -0,0 +1,155 @@ +nextflow_process { + + name "Test Process GATK4_GENOMICSDBIMPORT" + script "../main.nf" + process "GATK4_GENOMICSDBIMPORT" + + tag "modules" + tag "modules_nfcore" + tag "untar" + tag "gatk4" + tag "gatk4/genomicsdbimport" + + test("test_gatk4_genomicsdbimport_create_genomicsdb") { + + when { + process { + """ + // [meta, vcf, tbi, interval, interval_value, workspace ] + input[0] = [ [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) , + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true) , + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.interval_list', checkIfExists: true) , + [] , + [] ] + // run_intlist + input[1] = false + // run_updatewspace + input[2] = false + // input_map + input[3] = false + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.genomicsdb.get(0).get(1)).list().sort()).match() } + //{ assert snapshot(file(process.out.updatedb.get(0).get(1)).list().sort()).match() } + //{ assert snapshot(process.out.intervallist.get(0).get(1)).match() } + ) + } + + } + + test("test_gatk4_genomicsdbimport_get_intervalslist") { + + setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = Channel.of([ [], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/test_genomicsdb.tar.gz', checkIfExists: true) ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ [id:"test"], [], [], [], []]).combine(UNTAR.out.untar.map{ it[1] }) + // run_intlist + input[1] = true + // run_updatewspace + input[2] = false + // input_map + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + //{ assert snapshot(file(process.out.genomicsdb.get(0).get(1)).list().sort()).match() } + //{ assert snapshot(file(process.out.updatedb.get(0).get(1)).list().sort()).match() } + { assert snapshot(process.out.intervallist.get(0).get(1)).match() } + ) + } + + } + + test("test_gatk4_genomicsdbimport_update_genomicsdb") { + + setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = Channel.of([ [], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/test_genomicsdb.tar.gz', checkIfExists: true) ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ [id:"test"], file( params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz' , checkIfExists: true), file( params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz.tbi' , checkIfExists: true), [], []]).combine(UNTAR.out.untar.map{ it[1] }) + // run_intlist + input[1] = false + // run_updatewspace + input[2] = true + // input_map + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + //{ assert snapshot(file(process.out.genomicsdb.get(0).get(1)).list().sort()).match() } + { assert snapshot(file(process.out.updatedb.get(0).get(1)).list().sort()).match() } + //{ assert snapshot(process.out.intervallist.get(0).get(1)).match() } + ) + } + + } + + test("test_gatk4_genomicsdbimport_stub") { + + options "-stub" + + when { + process { + """ + // [meta, vcf, tbi, interval, interval_value, workspace ] + input[0] = [ [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) , + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true) , + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.interval_list', checkIfExists: true) , + [] , + [] ] + // run_intlist + input[1] = false + // run_updatewspace + input[2] = false + // input_map + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test.snap b/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test.snap new file mode 100644 index 0000000..a633bbd --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test.snap @@ -0,0 +1,40 @@ +{ + "test_gatk4_genomicsdbimport_get_intervalslist": { + "content": [ + "test.interval_list:md5,4c85812ac15fc1cd29711a851d23c0bf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-28T17:55:03.846241" + }, + "test_gatk4_genomicsdbimport_create_genomicsdb": { + "content": [ + "__tiledb_workspace.tdb", + "callset.json", + "chr22$1$40001", + "vcfheader.vcf", + "vidmap.json" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T11:22:10.11423157" + }, + "test_gatk4_genomicsdbimport_update_genomicsdb": { + "content": [ + "__tiledb_workspace.tdb", + "callset.json", + "chr22$1$40001", + "vcfheader.vcf", + "vidmap.json" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T12:46:42.403794676" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/genomicsdbimport/tests/nextflow.config b/modules/nf-core/gatk4/genomicsdbimport/tests/nextflow.config new file mode 100644 index 0000000..e177a14 --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/tests/nextflow.config @@ -0,0 +1,2 @@ +process { +} diff --git a/modules/nf-core/gatk4/genomicsdbimport/tests/tags.yml b/modules/nf-core/gatk4/genomicsdbimport/tests/tags.yml new file mode 100644 index 0000000..8a00857 --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/tests/tags.yml @@ -0,0 +1,3 @@ +gatk4/genomicsdbimport: + - "modules/nf-core/gatk4/genomicsdbimport/**" + - "modules/nf-core/untar/**" diff --git a/modules/nf-core/gatk4/mutect2/environment.yml b/modules/nf-core/gatk4/mutect2/environment.yml new file mode 100644 index 0000000..86f4bfa --- /dev/null +++ b/modules/nf-core/gatk4/mutect2/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_mutect2 +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/mutect2/main.nf b/modules/nf-core/gatk4/mutect2/main.nf index bddc368..79d8d28 100644 --- a/modules/nf-core/gatk4/mutect2/main.nf +++ b/modules/nf-core/gatk4/mutect2/main.nf @@ -2,10 +2,10 @@ process GATK4_MUTECT2 { tag "$meta.id" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(input), path(input_index), path(intervals) @@ -42,7 +42,8 @@ process GATK4_MUTECT2 { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}M" Mutect2 \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + Mutect2 \\ $inputs \\ --output ${prefix}.vcf.gz \\ --reference $fasta \\ diff --git a/modules/nf-core/gatk4/mutect2/meta.yml b/modules/nf-core/gatk4/mutect2/meta.yml index 4842c22..21c928e 100644 --- a/modules/nf-core/gatk4/mutect2/meta.yml +++ b/modules/nf-core/gatk4/mutect2/meta.yml @@ -2,8 +2,10 @@ name: gatk4_mutect2 description: Call somatic SNVs and indels via local assembly of haplotypes. keywords: - gatk4 - - mutect2 - haplotype + - indels + - mutect2 + - snvs - somatic tools: - gatk4: @@ -15,7 +17,6 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map @@ -77,7 +78,6 @@ input: type: file description: Index for the panel of normals. pattern: "*.vcf.gz.tbi" - output: - vcf: type: file @@ -99,7 +99,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@GCJMackenzie" - "@ramprasadn" +maintainers: + - "@GCJMackenzie" + - "@ramprasadn" diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml b/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml index e682f7e..2660836 100644 --- a/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml @@ -9,7 +9,7 @@ keywords: - variant_calling - genomicsdb_workspace - panel_of_normals -modules: +components: - gatk4/mutect2 - gatk4/genomicsdbimport - gatk4/createsomaticpanelofnormals @@ -65,3 +65,5 @@ output: pattern: "*vcf.gz.tbi" authors: - "@GCJMackenzie" +maintainers: + - "@GCJMackenzie" diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test new file mode 100644 index 0000000..3efffd4 --- /dev/null +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test @@ -0,0 +1,48 @@ +nextflow_workflow { + + name "Test Subworkflow BAM_CREATE_SOM_PON_GATK" + script "../main.nf" + config "./nextflow.config" + + workflow "BAM_CREATE_SOM_PON_GATK" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/bam_create_som_pon_gatk" + tag "gatk4" + tag "gatk4/mutect2" + tag "gatk4/genomicsdbimport" + tag "gatk4/createsomaticpanelofnormals" + + test("test_create_som_pon_gatk_bam") { + when { + workflow { + """ + // ch_mutect2_in + input[0] = Channel.of([[ id:'test1' ], file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true),[]],[[ id:'test2' ], file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true), []]) + // ch_fasta + input[1] = Channel.value([ [ id:'genome' ], file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)]) + // ch_fai + input[2] = Channel.value([ [ id:'genome' ], file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)]) + // ch_dict + input[3] = Channel.value([ [ id:'genome' ], file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists:true)]) + // str_pon_norm + input[4] = "test_panel" + // ch_interval_file + input[5] = Channel.value(file(params.test_data['homo_sapiens']['genome']['genome_21_interval_list'], checkIfExists: true)) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(file(workflow.out.mutect2_vcf.get(0).get(1)).name).match("test1.vcf.gz") }, + { assert snapshot(file(workflow.out.mutect2_index.get(0).get(1)).name).match("test1.vcf.gz.tbi") }, + { assert snapshot(file(workflow.out.mutect2_stats.get(0).get(1)).name).match("test1.vcf.gz.stats") }, + { assert snapshot(file(workflow.out.pon_vcf.get(0).get(1)).name).match("test_panel.vcf.gz") }, + { assert snapshot(file(workflow.out.pon_index.get(0).get(1)).name).match("test_panel.vcf.gz.tbi") }, + ) + } + } +} diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test.snap b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test.snap new file mode 100644 index 0000000..4c0d88a --- /dev/null +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test.snap @@ -0,0 +1,52 @@ +{ + "test_panel.vcf.gz": { + "content": [ + "test_panel.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T06:59:54.103667303" + }, + "test1.vcf.gz.stats": { + "content": [ + "test1.vcf.gz.stats" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T06:59:54.102164313" + }, + "test_panel.vcf.gz.tbi": { + "content": [ + "test_panel.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T06:59:54.105382853" + }, + "test1.vcf.gz": { + "content": [ + "test1.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T06:59:54.098085724" + }, + "test1.vcf.gz.tbi": { + "content": [ + "test1.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T06:59:54.100765684" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/tests/nextflow.config b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/nextflow.config new file mode 100644 index 0000000..6a98618 --- /dev/null +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GATK4_MUTECT2 { + ext.args = "--max-mnp-distance 0" + } +} diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/tests/tags.yml b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/tags.yml new file mode 100644 index 0000000..bb1b93c --- /dev/null +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_create_som_pon_gatk: + - subworkflows/nf-core/bam_create_som_pon_gatk/** From 6cfbc3c2226257299b7594a545f0c191e19de5c8 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 14:13:03 +0200 Subject: [PATCH 03/21] update and retore utils subworkflows --- modules.json | 15 +++++++++++++++ .../utils_nextflow_pipeline/tests/nextflow.config | 2 +- .../utils_nfcore_pipeline/tests/nextflow.config | 2 +- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/modules.json b/modules.json index 57887c1..b4c5bb7 100644 --- a/modules.json +++ b/modules.json @@ -98,6 +98,21 @@ "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["subworkflows"] + }, + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfvalidation_plugin": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] } } } diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config index 0fa4aba..d0a926b 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -3,7 +3,7 @@ manifest { author = """nf-core""" homePage = 'https://127.0.0.1' description = """Dummy pipeline""" - nextflowVersion = '!>=23.10.0' + nextflowVersion = '!>=23.04.0' version = '9.9.9' doi = 'https://doi.org/10.5281/zenodo.5070524' } diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config index 0fa4aba..d0a926b 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -3,7 +3,7 @@ manifest { author = """nf-core""" homePage = 'https://127.0.0.1' description = """Dummy pipeline""" - nextflowVersion = '!>=23.10.0' + nextflowVersion = '!>=23.04.0' version = '9.9.9' doi = 'https://doi.org/10.5281/zenodo.5070524' } From 2c0789ae945505f396cd0c42918af815401208a8 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 14:15:37 +0200 Subject: [PATCH 04/21] code polish --- workflows/createpanelrefs.nf | 38 ++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 3229777..9903875 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -27,25 +27,26 @@ include { GERMLINECNVCALLER_COHORT } from '../subworkflows/local/germlinecnvc include { CNVKIT_BATCH } from '../modules/nf-core/cnvkit/batch/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' +// Initialize file channels based on params, defined in the params.genomes[params.genome] scope +ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName],targets]}.collect() + : Channel.value([[:],[]]) +ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName],dict]}.collect() + : Channel.empty() +ch_exclude_bed = params.exclude_bed ? Channel.fromPath(params.exclude_bed).map { exclude -> [[id:exclude.baseName],exclude]}.collect() + : Channel.value([[:],[]]) +ch_exclude_interval_list = params.exclude_interval_list ? Channel.fromPath(params.exclude_interval_list).map { exclude -> [[id:exclude.baseName],exclude]}.collect() + : Channel.value([[:],[]]) +ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName],fai]}.collect() + : Channel.empty() +ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() + : Channel.empty() +ch_ploidy_priors = params.ploidy_priors ? Channel.fromPath(params.ploidy_priors).collect() + : Channel.empty() +ch_target_bed = params.target_bed ? Channel.fromPath(params.target_bed).map { targets -> [[id:targets.baseName],targets]}.collect() + : Channel.value([[:],[]]) +ch_target_interval_list = params.target_interval_list ? Channel.fromPath(params.target_interval_list).map { targets -> [[id:targets.baseName],targets]}.collect() + : Channel.value([[:],[]]) -ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName],targets]}.collect() - : Channel.value([[:],[]]) -ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName],dict]}.collect() - : Channel.empty() -ch_exclude_bed = params.exclude_bed ? Channel.fromPath(params.exclude_bed).map { exclude -> [[id:exclude.baseName],exclude]}.collect() - : Channel.value([[:],[]]) -ch_exclude_interval_list = params.exclude_interval_list ? Channel.fromPath(params.exclude_interval_list).map { exclude -> [[id:exclude.baseName],exclude]}.collect() - : Channel.value([[:],[]]) -ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName],fai]}.collect() - : Channel.empty() -ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() - : Channel.empty() -ch_ploidy_priors = params.ploidy_priors ? Channel.fromPath(params.ploidy_priors).collect() - : Channel.empty() -ch_target_bed = params.target_bed ? Channel.fromPath(params.target_bed).map { targets -> [[id:targets.baseName],targets]}.collect() - : Channel.value([[:],[]]) -ch_target_interval_list = params.target_interval_list ? Channel.fromPath(params.target_interval_list).map { targets -> [[id:targets.baseName],targets]}.collect() - : Channel.value([[:],[]]) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CONFIG FILES @@ -57,7 +58,6 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.mu ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW From a0c8de730b2c89a6d5a6b1aa7ad1d3c702155d47 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 14:30:02 +0200 Subject: [PATCH 05/21] update schema --- nextflow_schema.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 1a2daee..6a96e6e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -198,8 +198,8 @@ "type": "string", "fa_icon": "fas fa-toolbox", "description": "Tools to use for building Panel of Normals or models.", - "help_text": "Multiple tools separated with commas.\n\nTools available: CNVKIT,germlinecnvcaller", - "pattern": "^((cnvkit|germlinecnvcaller|gens)?,?)*(? Date: Wed, 3 Apr 2024 14:30:40 +0200 Subject: [PATCH 06/21] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index abde31e..446d7b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n - [#5](https://github.com/nf-core/createpanelrefs/pull/5) - `CNVKIT` can be used to create a PON - [#5](https://github.com/nf-core/createpanelrefs/pull/5) - Usage of nf-validation - [#5](https://github.com/nf-core/createpanelrefs/pull/5) - Usage of nf-test +- [#8](https://github.com/nf-core/createpanelrefs/pull/8) - `Mutect2` can be used to create a PON ### `Fixed` From 2d5c053f28c09333cfbc064126adb645531d34fa Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 16:01:24 +0200 Subject: [PATCH 07/21] add pon_name to schema --- nextflow.config | 216 ++++++++++++++++++----------------- nextflow_schema.json | 14 +++ workflows/createpanelrefs.nf | 58 +++++----- 3 files changed, 154 insertions(+), 134 deletions(-) diff --git a/nextflow.config b/nextflow.config index 7f0a4d1..8a46860 100644 --- a/nextflow.config +++ b/nextflow.config @@ -10,18 +10,22 @@ params { // Input options - input = null + input = null + // References - genome = null - igenomes_base = 's3://ngi-igenomes/igenomes/' - igenomes_ignore = false - fasta = null - fai = null - dict = null + genome = null + igenomes_base = 's3://ngi-igenomes/igenomes/' + igenomes_ignore = false + fasta = null + fai = null + dict = null // Building Panel of Normals and models tools = null // No default, must be specified + // Mutect2 options + pon_name = null + // Germlinecnvcaller options gcnv_analysis_type = 'wgs' gcnv_bin_length = 1000 @@ -45,36 +49,36 @@ params { multiqc_methods_description = null // Boilerplate options - outdir = null - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - version = false + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + version = false // Config options - config_profile_name = null - config_profile_description = null - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_contact = null - config_profile_url = null + config_profile_name = null + config_profile_description = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null // Max resource options // Defaults only, expecting to be overwritten - max_memory = '128.GB' - max_cpus = 16 - max_time = '240.h' + max_memory = '128.GB' + max_cpus = 16 + max_time = '240.h' // Schema validation default options - validationFailUnrecognisedParams = false - validationLenientMode = false - validationSchemaIgnoreParams = 'genomes,igenomes_base' - validationShowHiddenParams = false - validate_params = true + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes,igenomes_base' + validationShowHiddenParams = false + validate_params = true } @@ -97,95 +101,95 @@ try { // } profiles { debug { - dumpHashes = true - process.beforeScript = 'echo $HOSTNAME' - cleanup = false + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false nextflow.enable.configProcessNamesValidation = true } conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - channels = ['conda-forge', 'bioconda', 'defaults'] - apptainer.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + channels = ['conda-forge', 'bioconda', 'defaults'] + apptainer.enabled = false } mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { - docker.enabled = true - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - docker.runOptions = '-u $(id -u):$(id -g)' + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true - conda.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = true + singularity.autoMoun = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - apptainer.enabled = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false } apptainer { - apptainer.enabled = true - apptainer.autoMounts = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false } gitpod { - executor.name = 'local' - executor.cpus = 4 - executor.memory = 8.GB + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } @@ -194,10 +198,10 @@ profiles { // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile // Will not be used unless Apptainer / Docker / Podman / Singularity are enabled // Set to your registry if you have a mirror of containers -apptainer.registry = 'quay.io' -docker.registry = 'quay.io' -podman.registry = 'quay.io' -singularity.registry = 'quay.io' +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' // Nextflow plugins plugins { @@ -215,10 +219,10 @@ if (!params.igenomes_ignore) { // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. env { - PYTHONNOUSERSITE = 1 - R_PROFILE_USER = "/.Rprofile" - R_ENVIRON_USER = "/.Renviron" - JULIA_DEPOT_PATH = "/usr/local/share/julia" + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" + JULIA_DEPOT_PATH = "/usr/local/share/julia" } // Capture exit codes from upstream processes when piping diff --git a/nextflow_schema.json b/nextflow_schema.json index 6a96e6e..4cbdeb3 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -142,6 +142,17 @@ } } }, + "mutect2_options": { + "title": "Mutect2 options", + "type": "object", + "description": "Options used by the mutect2 subworkflow", + "default": "", + "properties": { + "pon_name": { + "type": "string" + } + } + }, "input_output_options": { "title": "Input/output options", "type": "object", @@ -458,6 +469,9 @@ { "$ref": "#/definitions/cnvkit_options" }, + { + "$ref": "#/definitions/mutect2_options" + }, { "$ref": "#/definitions/input_output_options" }, diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 9903875..05fd117 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -17,6 +17,7 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_crea include { GENS_PON } from '../subworkflows/local/gens_pon' include { GERMLINECNVCALLER_COHORT } from '../subworkflows/local/germlinecnvcaller_cohort' +include { BAM_CREATE_SOM_PON_GATK } from '../subworkflows/nf-core/bam_create_som_pon_gatk' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -28,24 +29,24 @@ include { CNVKIT_BATCH } from '../modules/nf-core/cnvkit/batch/ma include { MULTIQC } from '../modules/nf-core/multiqc/main' // Initialize file channels based on params, defined in the params.genomes[params.genome] scope -ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName],targets]}.collect() - : Channel.value([[:],[]]) -ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName],dict]}.collect() +ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName], targets]}.collect() + : Channel.value([[id:'null'], []]) +ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName], dict]}.collect() : Channel.empty() -ch_exclude_bed = params.exclude_bed ? Channel.fromPath(params.exclude_bed).map { exclude -> [[id:exclude.baseName],exclude]}.collect() - : Channel.value([[:],[]]) -ch_exclude_interval_list = params.exclude_interval_list ? Channel.fromPath(params.exclude_interval_list).map { exclude -> [[id:exclude.baseName],exclude]}.collect() - : Channel.value([[:],[]]) -ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName],fai]}.collect() +ch_exclude_bed = params.exclude_bed ? Channel.fromPath(params.exclude_bed).map { exclude -> [[id:exclude.baseName], exclude]}.collect() + : Channel.value([[id:'null'], []]) +ch_exclude_interval_list = params.exclude_interval_list ? Channel.fromPath(params.exclude_interval_list).map { exclude -> [[id:exclude.baseName], exclude]}.collect() + : Channel.value([[id:'null'], []]) +ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName], fai]}.collect() : Channel.empty() -ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() +ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName], fasta]}.collect() : Channel.empty() ch_ploidy_priors = params.ploidy_priors ? Channel.fromPath(params.ploidy_priors).collect() : Channel.empty() -ch_target_bed = params.target_bed ? Channel.fromPath(params.target_bed).map { targets -> [[id:targets.baseName],targets]}.collect() - : Channel.value([[:],[]]) -ch_target_interval_list = params.target_interval_list ? Channel.fromPath(params.target_interval_list).map { targets -> [[id:targets.baseName],targets]}.collect() - : Channel.value([[:],[]]) +ch_target_bed = params.target_bed ? Channel.fromPath(params.target_bed).map { targets -> [[id:targets.baseName], targets]}.collect() + : Channel.value([[id:'null'], []]) +ch_target_interval_list = params.target_interval_list ? Channel.fromPath(params.target_interval_list).map { targets -> [[id:targets.baseName], targets]}.collect() + : Channel.value([[id:'null'], []]) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -76,10 +77,7 @@ workflow CREATEPANELREFS { if (params.tools && params.tools.split(',').contains('cnvkit')) { ch_samplesheet - .map{ meta, bam, bai, cram, crai -> - new_meta = meta + [id:"panel"] - [new_meta, bam] - } + .map{ meta, bam, bai, cram, crai -> [meta + [id:'panel'], bam]} .groupTuple() .map {meta, bam -> [ meta, [], bam ]} .set { ch_cnvkit_input } @@ -92,8 +90,8 @@ workflow CREATEPANELREFS { ch_samplesheet .map{meta, bam, bai, cram, crai -> - if (bam) return [ meta + [data_type:"bam"], bam, bai ] - if (cram) return [ meta + [data_type:"cram"], cram, crai ] + if (bam) return [ meta + [data_type:'bam'], bam, bai ] + if (cram) return [ meta + [data_type:'cram'], cram, crai ] } .set { ch_germlinecnvcaller_input } @@ -112,14 +110,18 @@ workflow CREATEPANELREFS { if (params.tools && params.tools.split(',').contains('mutect2')) { - ch_samplesheet - .map{meta, bam, bai, cram, crai -> - if (bam) return [ meta + [data_type:"bam"], bam, bai ] - if (cram) return [ meta + [data_type:"cram"], cram, crai ] - } - .set { ch_mutect2_input } + ch_mutect2_input = ch_samplesheet.map{meta, bam, bai, cram, crai -> + if (bam) return [ meta + [data_type:'bam'], bam, bai ] + if (cram) return [ meta + [data_type:'cram'], cram, crai ] + } + + BAM_CREATE_SOM_PON_GATK(ch_mutect2_input, + ch_fasta, + ch_fai, + ch_dict, + params.pon_name, + ch_target_bed.map{ meta, bed -> [ bed ] }) - BAM_CREATE_SOM_PON_GATK ( ch_mutect2_input.map{ meta, cram -> [ meta, bam ]}, ch_fasta, ch_fai, ch_dict, params.pon_name, [] ) ch_versions = ch_versions.mix(BAM_CREATE_SOM_PON_GATK.out.versions) } @@ -128,8 +130,8 @@ workflow CREATEPANELREFS { ch_samplesheet .map{meta, bam, bai, cram, crai -> - if (bam) return [ meta + [data_type:"bam"], bam, bai ] - if (cram) return [ meta + [data_type:"cram"], cram, crai ] + if (bam) return [ meta + [data_type:'bam'], bam, bai ] + if (cram) return [ meta + [data_type:'cram'], cram, crai ] } .set { ch_gens_input } From b934a7e81610f92600d7873246aeab75ed03a0b1 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 16:25:05 +0200 Subject: [PATCH 08/21] fix mutect2 usage --- conf/test.config | 17 ++++++++++------- nextflow.config | 3 ++- tests/csv/1.0.0/bam.csv | 3 +++ tests/csv/1.0.0/cram.csv | 3 --- workflows/createpanelrefs.nf | 2 +- 5 files changed, 16 insertions(+), 12 deletions(-) create mode 100644 tests/csv/1.0.0/bam.csv delete mode 100644 tests/csv/1.0.0/cram.csv diff --git a/conf/test.config b/conf/test.config index 00bbe3b..a044044 100644 --- a/conf/test.config +++ b/conf/test.config @@ -20,17 +20,20 @@ params { max_time = '6.h' // Input data - input = "${projectDir}/tests/csv/1.0.0/cram.csv" + input = "${projectDir}/tests/csv/1.0.0/bam.csv" // Main options - tools = 'cnvkit' + tools = 'cnvkit,mutect2' //Germlinecnvcaller options - gcnv_scatter_content = 2 - gcnv_ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" + gcnv_scatter_content = 2 + gcnv_ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" // Small reference genome - genome = null - igenomes_ignore = true - fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" + genome = null + igenomes_ignore = true + dict = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.dict" + fai = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai" + fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" + target_bed = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" } diff --git a/nextflow.config b/nextflow.config index 8a46860..5ee0d10 100644 --- a/nextflow.config +++ b/nextflow.config @@ -19,9 +19,10 @@ params { fasta = null fai = null dict = null + target_bed = null // Building Panel of Normals and models - tools = null // No default, must be specified + tools = null // No default, must be specified // Mutect2 options pon_name = null diff --git a/tests/csv/1.0.0/bam.csv b/tests/csv/1.0.0/bam.csv new file mode 100644 index 0000000..b95f604 --- /dev/null +++ b/tests/csv/1.0.0/bam.csv @@ -0,0 +1,3 @@ +sample,bam,bai +sample1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai +sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai diff --git a/tests/csv/1.0.0/cram.csv b/tests/csv/1.0.0/cram.csv deleted file mode 100644 index e132a68..0000000 --- a/tests/csv/1.0.0/cram.csv +++ /dev/null @@ -1,3 +0,0 @@ -sample,bam -sample1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam -sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 05fd117..c260d60 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -115,7 +115,7 @@ workflow CREATEPANELREFS { if (cram) return [ meta + [data_type:'cram'], cram, crai ] } - BAM_CREATE_SOM_PON_GATK(ch_mutect2_input, + BAM_CREATE_SOM_PON_GATK(ch_mutect2_input.map{ meta, reads, index -> [ meta, reads, index, [] ] }, ch_fasta, ch_fai, ch_dict, From 6be5bbb0162ab492c85ed040ae2b069bb12da45d Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 16:28:41 +0200 Subject: [PATCH 09/21] update schema --- nextflow_schema.json | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/nextflow_schema.json b/nextflow_schema.json index 4cbdeb3..f1924de 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -261,6 +261,15 @@ "fa_icon": "fas fa-file", "exists": true, "mimetype": "text/plain" + }, + "target_bed": { + "type": "string", + "description": "Path to target bed file", + "pattern": "^\\S+\\.bed$", + "format": "file-path", + "fa_icon": "fas fa-file", + "exists": true, + "mimetype": "text/plain" } } }, From 5eaf21ff14eb960d0c62c3f793358edf82b435b8 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 16:48:16 +0200 Subject: [PATCH 10/21] update subworkflow and fix tests --- conf/modules/mutect2.config | 19 ++++ conf/test.config | 2 +- modules.json | 88 ++++++++++++++----- nextflow.config | 1 + .../tests/main.nf.test | 20 +++-- workflows/createpanelrefs.nf | 6 +- 6 files changed, 104 insertions(+), 32 deletions(-) create mode 100644 conf/modules/mutect2.config diff --git a/conf/modules/mutect2.config b/conf/modules/mutect2.config new file mode 100644 index 0000000..04900ee --- /dev/null +++ b/conf/modules/mutect2.config @@ -0,0 +1,19 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + + withName: GATK4_MUTECT2 { + ext.args = "--max-mnp-distance 0" + } + +} diff --git a/conf/test.config b/conf/test.config index a044044..763db91 100644 --- a/conf/test.config +++ b/conf/test.config @@ -32,8 +32,8 @@ params { // Small reference genome genome = null igenomes_ignore = true + fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" dict = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.dict" fai = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai" - fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" target_bed = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" } diff --git a/modules.json b/modules.json index b4c5bb7..786d019 100644 --- a/modules.json +++ b/modules.json @@ -8,87 +8,121 @@ "cnvkit/batch": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/annotateintervals": { "branch": "master", "git_sha": "42ae163c3c6eb23646189c30c07a889ad39c9b0e", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/bedtointervallist": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/collectreadcounts": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/createreadcountpanelofnormals": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/createsomaticpanelofnormals": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": ["bam_create_som_pon_gatk"] + "installed_by": [ + "bam_create_som_pon_gatk" + ] }, "gatk4/determinegermlinecontigploidy": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/filterintervals": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/genomicsdbimport": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["bam_create_som_pon_gatk"] + "installed_by": [ + "bam_create_som_pon_gatk" + ] }, "gatk4/germlinecnvcaller": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/intervallisttools": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/mutect2": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": ["bam_create_som_pon_gatk"] + "installed_by": [ + "bam_create_som_pon_gatk" + ] }, "gatk4/preprocessintervals": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "picard/createsequencedictionary": { "branch": "master", "git_sha": "20b0918591d4ba20047d7e13e5094bcceba81447", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/faidx": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/index": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -96,26 +130,34 @@ "nf-core": { "bam_create_som_pon_gatk": { "branch": "master", - "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": ["subworkflows"] + "git_sha": "b0783b07beb65cac505fa6202e8f670437637b45", + "installed_by": [ + "subworkflows" + ] }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 5ee0d10..243473a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -266,6 +266,7 @@ includeConfig 'conf/modules/base.config' includeConfig 'conf/modules/cnvkit.config' includeConfig 'conf/modules/germlinecnvcaller_cohort.config' includeConfig 'conf/modules/gens_pon.config' +includeConfig 'conf/modules/mutect2.config' // Function to ensure that resource requirements don't go beyond // a maximum limit diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test index 3efffd4..57aee89 100644 --- a/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test @@ -19,17 +19,27 @@ nextflow_workflow { workflow { """ // ch_mutect2_in - input[0] = Channel.of([[ id:'test1' ], file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true),[]],[[ id:'test2' ], file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true), []]) + input[0] = Channel.of([ + [ id:'test1' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true), + [] ], + [ + [ id:'test2' ], + file(params.modules_testdata_base_path+ 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path+ 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true), + [] ] + ) // ch_fasta - input[1] = Channel.value([ [ id:'genome' ], file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)]) + input[1] = Channel.value([ [ id:'genome' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true)]) // ch_fai - input[2] = Channel.value([ [ id:'genome' ], file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)]) + input[2] = Channel.value([ [ id:'genome' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true)]) // ch_dict - input[3] = Channel.value([ [ id:'genome' ], file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists:true)]) + input[3] = Channel.value([ [ id:'genome' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.dict', checkIfExists:true)]) // str_pon_norm input[4] = "test_panel" // ch_interval_file - input[5] = Channel.value(file(params.test_data['homo_sapiens']['genome']['genome_21_interval_list'], checkIfExists: true)) + input[5] = Channel.value(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', checkIfExists: true)) """ } } diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index c260d60..a5fb940 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -111,11 +111,11 @@ workflow CREATEPANELREFS { if (params.tools && params.tools.split(',').contains('mutect2')) { ch_mutect2_input = ch_samplesheet.map{meta, bam, bai, cram, crai -> - if (bam) return [ meta + [data_type:'bam'], bam, bai ] - if (cram) return [ meta + [data_type:'cram'], cram, crai ] + if (bam) return [ meta + [data_type:'bam'], bam, bai, [] ] + if (cram) return [ meta + [data_type:'cram'], cram, crai, [] ] } - BAM_CREATE_SOM_PON_GATK(ch_mutect2_input.map{ meta, reads, index -> [ meta, reads, index, [] ] }, + BAM_CREATE_SOM_PON_GATK(ch_mutect2_input, ch_fasta, ch_fai, ch_dict, From d676fdb3cb1d475158ccd8677eb14ebc7df3b84e Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 16:57:04 +0200 Subject: [PATCH 11/21] fix default tests with updated config --- conf/test.config | 2 +- tests/pipeline/default.nf.test.snap | 20 ++++++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/conf/test.config b/conf/test.config index 763db91..3912e07 100644 --- a/conf/test.config +++ b/conf/test.config @@ -23,7 +23,7 @@ params { input = "${projectDir}/tests/csv/1.0.0/bam.csv" // Main options - tools = 'cnvkit,mutect2' + tools = 'cnvkit' //Germlinecnvcaller options gcnv_scatter_content = 2 diff --git a/tests/pipeline/default.nf.test.snap b/tests/pipeline/default.nf.test.snap index e921a19..4d21008 100644 --- a/tests/pipeline/default.nf.test.snap +++ b/tests/pipeline/default.nf.test.snap @@ -3,16 +3,24 @@ "content": [ "{CNVKIT_BATCH={cnvkit=0.9.10}, Workflow={nf-core/createpanelrefs=v1.0dev}}" ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.02.0" + }, "timestamp": "2024-02-21T12:37:23.523857103" }, "cnvkit": { "content": [ - "panel.cnn:md5,07dea67088da689ad04012552c606882", - "test.paired_end.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", - "test.paired_end.sorted.targetcoverage.cnn:md5,ff526714696aa49bdc1dc8d00d965266", - "test2.paired_end.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", - "test2.paired_end.sorted.targetcoverage.cnn:md5,6ae6b3fce7299eedca6133d911c38fe1" + "panel.cnn:md5,68028cd2b4e0fc4489bf5bfd0a73440f", + "test.paired_end.recalibrated.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,0067cc3a0e479b23ab3bf056cead31b4", + "test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test2.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,1e1012812eb893afd931485cb760294e" ], - "timestamp": "2023-07-08T16:47:57+0000" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.02.0" + }, + "timestamp": "2024-04-03T16:55:32.197815" } } \ No newline at end of file From c6afd52fc77cf2a637d0546960b3eb535da2c9e3 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 17:06:36 +0200 Subject: [PATCH 12/21] add tests for mutect2_pon --- tests/pipeline/mutect2.nf.test | 26 ++++++++++++++++++++++++++ tests/pipeline/mutect2.nf.test.snap | 12 ++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 tests/pipeline/mutect2.nf.test create mode 100644 tests/pipeline/mutect2.nf.test.snap diff --git a/tests/pipeline/mutect2.nf.test b/tests/pipeline/mutect2.nf.test new file mode 100644 index 0000000..ac2e429 --- /dev/null +++ b/tests/pipeline/mutect2.nf.test @@ -0,0 +1,26 @@ +nextflow_pipeline { + + name "Test MUTECT2_PON" + script "main.nf" + tag "MUTECT2" + + test("Run MUTECT2 test") { + + when { + params { + outdir = "$outputDir" + tools = 'mutect2' + pon_name = 'test' + validationSchemaIgnoreParams = 'genomes,baseDir,base-dir,outputDir,output-dir' + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/multiqc/").exists() }, + { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") } + ) + } + } +} diff --git a/tests/pipeline/mutect2.nf.test.snap b/tests/pipeline/mutect2.nf.test.snap new file mode 100644 index 0000000..95bd5fc --- /dev/null +++ b/tests/pipeline/mutect2.nf.test.snap @@ -0,0 +1,12 @@ +{ + "software_versions": { + "content": [ + "{GATK4_CREATESOMATICPANELOFNORMALS={gatk4=4.5.0.0}, GATK4_GENOMICSDBIMPORT={gatk4=4.5.0.0}, GATK4_MUTECT2={gatk4=4.5.0.0}, Workflow={nf-core/createpanelrefs=v1.0dev}}" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.02.0" + }, + "timestamp": "2024-04-03T17:06:08.093342" + } +} \ No newline at end of file From 61be19b29d9aa5de983d251ec222b06b8abf372c Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 17:07:52 +0200 Subject: [PATCH 13/21] update gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 8cf857e..7d797ff 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ results/ testing* testing/ work/ +.nf-test.log From 3f2124ee792badf8ff5aa77ef1b568904afb61b9 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 17:10:07 +0200 Subject: [PATCH 14/21] prettier --- modules.json | 86 ++++++++++++++-------------------------------------- 1 file changed, 22 insertions(+), 64 deletions(-) diff --git a/modules.json b/modules.json index 786d019..49a3fcc 100644 --- a/modules.json +++ b/modules.json @@ -8,121 +8,87 @@ "cnvkit/batch": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/annotateintervals": { "branch": "master", "git_sha": "42ae163c3c6eb23646189c30c07a889ad39c9b0e", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/bedtointervallist": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/collectreadcounts": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/createreadcountpanelofnormals": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/createsomaticpanelofnormals": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": [ - "bam_create_som_pon_gatk" - ] + "installed_by": ["bam_create_som_pon_gatk"] }, "gatk4/determinegermlinecontigploidy": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/filterintervals": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/genomicsdbimport": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "bam_create_som_pon_gatk" - ] + "installed_by": ["bam_create_som_pon_gatk"] }, "gatk4/germlinecnvcaller": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/intervallisttools": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/mutect2": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": [ - "bam_create_som_pon_gatk" - ] + "installed_by": ["bam_create_som_pon_gatk"] }, "gatk4/preprocessintervals": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "picard/createsequencedictionary": { "branch": "master", "git_sha": "20b0918591d4ba20047d7e13e5094bcceba81447", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/faidx": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/index": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -131,33 +97,25 @@ "bam_create_som_pon_gatk": { "branch": "master", "git_sha": "b0783b07beb65cac505fa6202e8f670437637b45", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} From b3c21cd71708c841d285c47c3ee1e4ca98b19460 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 17:21:10 +0200 Subject: [PATCH 15/21] add mutect2 tests to CI --- tests/config/tags.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/config/tags.yml b/tests/config/tags.yml index 3672b33..2deb666 100644 --- a/tests/config/tags.yml +++ b/tests/config/tags.yml @@ -10,7 +10,16 @@ default: cnvkit: - conf/modules/cnvkit.config - - modules/nf-core/cnvkit/batch/main.nf + - modules/nf-core/cnvkit/batch/** + - tests/cnvkit.nf.test + +mutect2: + - conf/modules/mutect2.config + - modules/nf-core/gatk4/mutect2/** + - modules/nf-core/gatk4/genomicsdbimport/** + - modules/nf-core/gatk4/createsomaticpanelofnormals/** + - modules/nf-core/cnvkit/batch/** + - subworkflows/nf-core/bam_create_som_pon_gatk/** - tests/cnvkit.nf.test germlinecnvcaller_cohort: From 2a35f8d7c4a3f392ac26a9acc70832380edfcf2b Mon Sep 17 00:00:00 2001 From: Maxime U Garcia Date: Thu, 4 Apr 2024 16:39:13 +0200 Subject: [PATCH 16/21] Update nextflow.config Co-authored-by: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 243473a..5acc3bc 100644 --- a/nextflow.config +++ b/nextflow.config @@ -142,7 +142,7 @@ profiles { } singularity { singularity.enabled = true - singularity.autoMoun = true + singularity.autoMounts = true conda.enabled = false docker.enabled = false podman.enabled = false From bf5a2d65e0bce96860303927f17f3c8f5baf3acf Mon Sep 17 00:00:00 2001 From: Maxime U Garcia Date: Thu, 4 Apr 2024 16:43:39 +0200 Subject: [PATCH 17/21] Update conf/modules/mutect2.config Co-authored-by: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> --- conf/modules/mutect2.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules/mutect2.config b/conf/modules/mutect2.config index 04900ee..9bbbb9d 100644 --- a/conf/modules/mutect2.config +++ b/conf/modules/mutect2.config @@ -12,7 +12,7 @@ process { - withName: GATK4_MUTECT2 { + withName: '.*BAM_CREATE_SOM_PON_GATK:GATK4_MUTECT2' { ext.args = "--max-mnp-distance 0" } From 16d37de26d4d08f3231d9095f6afcff94fa128cd Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 4 Apr 2024 16:41:27 +0200 Subject: [PATCH 18/21] feat: make tools a required params --- nextflow_schema.json | 1 + 1 file changed, 1 insertion(+) diff --git a/nextflow_schema.json b/nextflow_schema.json index f1924de..4e61ed8 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -203,6 +203,7 @@ "title": "Main options", "type": "object", "description": "Most common options used for the pipeline", + "required": ["tools"], "default": "", "properties": { "tools": { From c967164c9947bdc326eff5f3335a54247561aa31 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 4 Apr 2024 16:43:20 +0200 Subject: [PATCH 19/21] feat: pon_name is now mutect2_pon_name --- nextflow.config | 2 +- nextflow_schema.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow.config b/nextflow.config index 5acc3bc..a837e6e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -25,7 +25,7 @@ params { tools = null // No default, must be specified // Mutect2 options - pon_name = null + mutect2_pon_name = null // Germlinecnvcaller options gcnv_analysis_type = 'wgs' diff --git a/nextflow_schema.json b/nextflow_schema.json index 4e61ed8..f03260a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -148,7 +148,7 @@ "description": "Options used by the mutect2 subworkflow", "default": "", "properties": { - "pon_name": { + "mutect2_pon_name": { "type": "string" } } From f05ca82ec7b3291b5324e6ffee2f1afb2b801df4 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 4 Apr 2024 16:44:47 +0200 Subject: [PATCH 20/21] fix: pon_name is mutect2_pon_name in the test too --- tests/pipeline/mutect2.nf.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipeline/mutect2.nf.test b/tests/pipeline/mutect2.nf.test index ac2e429..767e0f9 100644 --- a/tests/pipeline/mutect2.nf.test +++ b/tests/pipeline/mutect2.nf.test @@ -10,7 +10,7 @@ nextflow_pipeline { params { outdir = "$outputDir" tools = 'mutect2' - pon_name = 'test' + mutect2_pon_name = 'test' validationSchemaIgnoreParams = 'genomes,baseDir,base-dir,outputDir,output-dir' } } From 2a84359f13eb9ac4d474a0b8c656deda39d7e965 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 4 Apr 2024 17:11:35 +0200 Subject: [PATCH 21/21] fix: fix usage of mutect2_pon_name params usage --- workflows/createpanelrefs.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index a5fb940..ac8b2be 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -119,7 +119,7 @@ workflow CREATEPANELREFS { ch_fasta, ch_fai, ch_dict, - params.pon_name, + params.mutect2_pon_name, ch_target_bed.map{ meta, bed -> [ bed ] }) ch_versions = ch_versions.mix(BAM_CREATE_SOM_PON_GATK.out.versions)