Skip to content

Commit

Permalink
Merge pull request #19 from ramprasadn/germlinecnvcallerupdate
Browse files Browse the repository at this point in the history
update Germlinecnvcaller subworkflow
  • Loading branch information
maxulysse authored Mar 28, 2024
2 parents bed052e + cdec400 commit 69bccbb
Show file tree
Hide file tree
Showing 12 changed files with 273 additions and 35 deletions.
8 changes: 8 additions & 0 deletions conf/modules/germlinecnvcaller_cohort.config
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@ process {
]
}

withName: '.*GERMLINECNVCALLER_COHORT:GATK4_BEDTOINTERVALLIST_TARGETS' {
ext.when = { params.analysis_type.equals("wes") && params.target_interval_list.equals(null) && params.target_bed }
}

withName: '.*GERMLINECNVCALLER_COHORT:GATK4_BEDTOINTERVALLIST_EXCLUDE' {
ext.when = { params.analysis_type.equals("wes") && params.exclude_interval_list.equals(null) && params.exclude_bed }
}

withName: '.*GERMLINECNVCALLER_COHORT:GATK4_PREPROCESSINTERVALS' {
ext.args = { ["--imr OVERLAPPING_ONLY",
"--padding ${params.padding}",
Expand Down
16 changes: 12 additions & 4 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,18 @@ If you wish to share such profile (such as upload as supplementary material for

If you are running the pipeline to generate references for the GATK's germlinecnvcalling workflow, you should ensure that you have provided all the mandatory options specified in the table below.

| Mandatory | Optional |
| ------------------------- | -------- |
| fasta/genomes | fai |
| ploidy_priors<sup>1</sup> | dict |
| Mandatory | Optional |
| ------------------------- | --------------------------------- |
| fasta/genomes | fai |
| ploidy_priors<sup>1</sup> | dict |
| | target_bed/target_interval_list |
| | exclude_bed/exclude_interval_list |
| | bin_length |
| | mappable_regions |
| | padding |
| | readcount_format |
| | scatter_content |
| | segmental_duplications |

<sup>1</sup> To learn more about this file, see [this comment](https://gatk.broadinstitute.org/hc/en-us/community/posts/360074399831/comments/13441240230299) on GATK forum.<br />

Expand Down
11 changes: 7 additions & 4 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ nextflow.enable.dsl = 2
IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

include { CREATEPANELREFS } from './workflows/createpanelrefs'
include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_createpanelrefs_pipeline'
include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_createpanelrefs_pipeline'
Expand All @@ -31,9 +30,13 @@ include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_crea

// This is an example of how to use getGenomeAttribute() to fetch parameters
// from igenomes.config using `--genome`
params.fasta = getGenomeAttribute('fasta')
params.fai = getGenomeAttribute('fai')
params.dict = getGenomeAttribute('dict')
params.fasta = getGenomeAttribute('fasta')
params.fai = getGenomeAttribute('fai')
params.dict = getGenomeAttribute('dict')
params.target_bed = getGenomeAttribute('target_bed')
params.target_interval_list = getGenomeAttribute('target_interval_list')
params.exclude_bed = getGenomeAttribute('exclude_bed')
params.exclude_interval_list = getGenomeAttribute('exclude_interval_list')

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
5 changes: 5 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
"git_sha": "42ae163c3c6eb23646189c30c07a889ad39c9b0e",
"installed_by": ["modules"]
},
"gatk4/bedtointervallist": {
"branch": "master",
"git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
"installed_by": ["modules"]
},
"gatk4/collectreadcounts": {
"branch": "master",
"git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
Expand Down
7 changes: 7 additions & 0 deletions modules/nf-core/gatk4/bedtointervallist/environment.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

56 changes: 56 additions & 0 deletions modules/nf-core/gatk4/bedtointervallist/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

51 changes: 51 additions & 0 deletions modules/nf-core/gatk4/bedtointervallist/meta.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ params {
tools = null // No default, must be specified

// Germlinecnvcaller options
analysis_type = 'wgs'
bin_length = 1000
mappable_regions = null
padding = 0
Expand Down
43 changes: 43 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,38 @@
"description": "Options used by the germlinecnvcaller subworkflow",
"default": "",
"properties": {
"analysis_type": {
"type": "string",
"default": "wgs",
"description": "Specifies which analysis type for the pipeline- either 'wgs' or 'wes'.",
"fa_icon": "fas fa-align-center",
"enum": ["wgs", "wes"]
},
"bin_length": {
"type": "number",
"default": 1000,
"description": "Length (in bp) of the bins. If zero, no binning will be performed.",
"fa_icon": "fas fa-sort-numeric-down",
"help_text": "Used by GATK's PreprocessIntervals. GATK recommends a bin length of 1000 for WGS analysis, and 0 for WES analysis. "
},
"exclude_bed": {
"type": "string",
"exists": true,
"format": "path",
"fa_icon": "fas fa-file",
"pattern": "^\\S+\\.bed$",
"description": "Path to directory for a bed file containing regions to be exluded from the analysis.",
"help_text": "If the regions you would like to exclude are in bed format, use this option. If you have an interval_list file, use `exclude_interval_list` parameter instead."
},
"exclude_interval_list": {
"type": "string",
"exists": true,
"format": "path",
"fa_icon": "fas fa-file",
"pattern": "^\\S+\\._interval_list$",
"description": "Path to directory for exclude_interval_list file.",
"help_text": "If the regions you would like to exclude are in interval_list format, use this option. If you have a bed file, use `exclude` parameter instead."
},
"mappable_regions": {
"type": "string",
"exists": true,
Expand Down Expand Up @@ -63,6 +88,24 @@
"format": "file-path",
"fa_icon": "fas fa-file",
"help_text": "Used by GATK's AnnotateIntervals."
},
"target_bed": {
"type": "string",
"exists": true,
"format": "path",
"fa_icon": "fas fa-file",
"pattern": "^\\S+\\.bed$",
"description": "Path to directory for target bed file.",
"help_text": "If the regions you would like to analyse are in bed format, use this option. If you have an interval_list file, use `target_interval_list` parameter instead."
},
"target_interval_list": {
"type": "string",
"exists": true,
"format": "path",
"fa_icon": "fas fa-file",
"pattern": "^\\S+\\._interval_list$",
"description": "Path to directory for target interval_list file.",
"help_text": "If the regions you would like to analyse are in interval_list format, use this option. If you have a bed file, use `target_bed` parameter instead."
}
}
},
Expand Down
72 changes: 56 additions & 16 deletions subworkflows/local/germlinecnvcaller_cohort.nf
Original file line number Diff line number Diff line change
@@ -1,21 +1,27 @@
include { GATK4_ANNOTATEINTERVALS } from '../../modules/nf-core/gatk4/annotateintervals/main'
include { GATK4_COLLECTREADCOUNTS } from '../../modules/nf-core/gatk4/collectreadcounts/main'
include { GATK4_DETERMINEGERMLINECONTIGPLOIDY } from '../../modules/nf-core/gatk4/determinegermlinecontigploidy/main'
include { GATK4_FILTERINTERVALS } from '../../modules/nf-core/gatk4/filterintervals/main'
include { GATK4_GERMLINECNVCALLER } from '../../modules/nf-core/gatk4/germlinecnvcaller/main'
include { GATK4_INTERVALLISTTOOLS } from '../../modules/nf-core/gatk4/intervallisttools/main'
include { GATK4_PREPROCESSINTERVALS } from '../../modules/nf-core/gatk4/preprocessintervals/main'
include { PICARD_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/picard/createsequencedictionary/main'
include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main'
include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main'
include { GATK4_ANNOTATEINTERVALS } from '../../modules/nf-core/gatk4/annotateintervals/main'
include { GATK4_BEDTOINTERVALLIST as GATK4_BEDTOINTERVALLIST_TARGETS } from '../../modules/nf-core/gatk4/bedtointervallist/main'
include { GATK4_BEDTOINTERVALLIST as GATK4_BEDTOINTERVALLIST_EXCLUDE } from '../../modules/nf-core/gatk4/bedtointervallist/main'
include { GATK4_COLLECTREADCOUNTS } from '../../modules/nf-core/gatk4/collectreadcounts/main'
include { GATK4_DETERMINEGERMLINECONTIGPLOIDY } from '../../modules/nf-core/gatk4/determinegermlinecontigploidy/main'
include { GATK4_FILTERINTERVALS } from '../../modules/nf-core/gatk4/filterintervals/main'
include { GATK4_GERMLINECNVCALLER } from '../../modules/nf-core/gatk4/germlinecnvcaller/main'
include { GATK4_INTERVALLISTTOOLS } from '../../modules/nf-core/gatk4/intervallisttools/main'
include { GATK4_PREPROCESSINTERVALS } from '../../modules/nf-core/gatk4/preprocessintervals/main'
include { PICARD_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/picard/createsequencedictionary/main'
include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main'
include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main'

workflow GERMLINECNVCALLER_COHORT {
take:
ch_user_dict // channel: [mandatory] [ val(meta), path(dict) ]
ch_user_fai // channel: [mandatory] [ val(meta), path(fai) ]
ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ]
ch_ploidy_priors // channel: [mandatory] [ path(tsv) ]
ch_user_dict // channel: [mandatory] [ val(meta), path(dict) ]
ch_user_fai // channel: [mandatory] [ val(meta), path(fai) ]
ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ]
ch_ploidy_priors // channel: [mandatory] [ path(tsv) ]
ch_target_bed // channel: [mandatory] [ val(meta), path(bed) ]
ch_user_target_interval_list // channel: [mandatory] [ val(meta), path(intervals) ]
ch_exclude_bed // channel: [mandatory] [ val(meta), path(bed) ]
ch_user_exclude_interval_list // channel: [mandatory] [ val(meta), path(intervals) ]

main:
ch_versions = Channel.empty()
Expand All @@ -37,10 +43,42 @@ workflow GERMLINECNVCALLER_COHORT {
.collect()
.set { ch_fai }

GATK4_BEDTOINTERVALLIST_TARGETS (ch_target_bed, ch_dict) //Runs for wes analysis, when target_bed file is provided instead of target_interval_list
GATK4_BEDTOINTERVALLIST_EXCLUDE (ch_exclude_bed, ch_dict) //Runs for wes analysis, when exclude_bed file is provided instead of target_interval_list

ch_user_target_interval_list
.combine(GATK4_BEDTOINTERVALLIST_TARGETS.out.interval_list.ifEmpty(null))
.branch { it ->
intervallistfrompath: it[2].equals(null)
return [it[0], it[1]]
intervallistfrombed: !(it[2].equals(null))
return [it[2], it[3]]
}
.set { ch_targets_for_mix }

ch_targets_for_mix.intervallistfrompath.mix(ch_targets_for_mix.intervallistfrombed)
.collect()
.set {ch_target_interval_list}

ch_user_exclude_interval_list
.combine(GATK4_BEDTOINTERVALLIST_EXCLUDE.out.interval_list.ifEmpty(null))
.branch { it ->
intervallistfrompath: it[2].equals(null)
return [it[0], it[1]]
intervallistfrombed: !(it[2].equals(null))
return [it[2], it[3]]
}
.set { ch_exclude_for_mix }

ch_exclude_for_mix.intervallistfrompath.mix(ch_exclude_for_mix.intervallistfrombed)
.collect()
.set { ch_exclude_interval_list }

GATK4_PREPROCESSINTERVALS ( ch_fasta,
ch_fai,
ch_dict,
[[:],[]], [[:],[]] )
ch_target_interval_list,
ch_exclude_interval_list)

GATK4_ANNOTATEINTERVALS ( GATK4_PREPROCESSINTERVALS.out.interval_list,
ch_fasta,
Expand Down Expand Up @@ -121,6 +159,8 @@ workflow GERMLINECNVCALLER_COHORT {
ch_versions = ch_versions.mix(PICARD_CREATESEQUENCEDICTIONARY.out.versions)
ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
ch_versions = ch_versions.mix(GATK4_PREPROCESSINTERVALS.out.versions)
ch_versions = ch_versions.mix(GATK4_BEDTOINTERVALLIST_TARGETS.out.versions)
ch_versions = ch_versions.mix(GATK4_BEDTOINTERVALLIST_EXCLUDE.out.versions)
ch_versions = ch_versions.mix(GATK4_COLLECTREADCOUNTS.out.versions.first())
ch_versions = ch_versions.mix(GATK4_ANNOTATEINTERVALS.out.versions)
ch_versions = ch_versions.mix(GATK4_FILTERINTERVALS.out.versions)
Expand Down
4 changes: 4 additions & 0 deletions tests/pipeline/germlinecnvcaller_cohort.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ nextflow_workflow {
[[ id:'test' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam"),[]],
[[ id:'test2' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam"),[]])
input[4] = Channel.fromPath(params.ploidy_priors)
input[5] = Channel.value([[:],[]])
input[6] = Channel.value([[:],[]])
input[7] = Channel.value([[:],[]])
input[8] = Channel.value([[:],[]])
"""
}
}
Expand Down
Loading

0 comments on commit 69bccbb

Please sign in to comment.