Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update Germlinecnvcaller subworkflow #19

Merged
merged 6 commits into from
Mar 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions conf/modules/germlinecnvcaller_cohort.config
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@ process {
]
}

withName: '.*GERMLINECNVCALLER_COHORT:GATK4_BEDTOINTERVALLIST_TARGETS' {
ext.when = { params.analysis_type.equals("wes") && params.target_interval_list.equals(null) && params.target_bed }
}

withName: '.*GERMLINECNVCALLER_COHORT:GATK4_BEDTOINTERVALLIST_EXCLUDE' {
ext.when = { params.analysis_type.equals("wes") && params.exclude_interval_list.equals(null) && params.exclude_bed }
}

withName: '.*GERMLINECNVCALLER_COHORT:GATK4_PREPROCESSINTERVALS' {
ext.args = { ["--imr OVERLAPPING_ONLY",
"--padding ${params.padding}",
Expand Down
16 changes: 12 additions & 4 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,18 @@ If you wish to share such profile (such as upload as supplementary material for

If you are running the pipeline to generate references for the GATK's germlinecnvcalling workflow, you should ensure that you have provided all the mandatory options specified in the table below.

| Mandatory | Optional |
| ------------------------- | -------- |
| fasta/genomes | fai |
| ploidy_priors<sup>1</sup> | dict |
| Mandatory | Optional |
| ------------------------- | --------------------------------- |
| fasta/genomes | fai |
| ploidy_priors<sup>1</sup> | dict |
| | target_bed/target_interval_list |
| | exclude_bed/exclude_interval_list |
| | bin_length |
| | mappable_regions |
| | padding |
| | readcount_format |
| | scatter_content |
| | segmental_duplications |

<sup>1</sup> To learn more about this file, see [this comment](https://gatk.broadinstitute.org/hc/en-us/community/posts/360074399831/comments/13441240230299) on GATK forum.<br />

Expand Down
11 changes: 7 additions & 4 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ nextflow.enable.dsl = 2
IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

include { CREATEPANELREFS } from './workflows/createpanelrefs'
include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_createpanelrefs_pipeline'
include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_createpanelrefs_pipeline'
Expand All @@ -31,9 +30,13 @@ include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_crea

// This is an example of how to use getGenomeAttribute() to fetch parameters
// from igenomes.config using `--genome`
params.fasta = getGenomeAttribute('fasta')
params.fai = getGenomeAttribute('fai')
params.dict = getGenomeAttribute('dict')
params.fasta = getGenomeAttribute('fasta')
params.fai = getGenomeAttribute('fai')
params.dict = getGenomeAttribute('dict')
params.target_bed = getGenomeAttribute('target_bed')
params.target_interval_list = getGenomeAttribute('target_interval_list')
params.exclude_bed = getGenomeAttribute('exclude_bed')
params.exclude_interval_list = getGenomeAttribute('exclude_interval_list')

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
5 changes: 5 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
"git_sha": "42ae163c3c6eb23646189c30c07a889ad39c9b0e",
"installed_by": ["modules"]
},
"gatk4/bedtointervallist": {
"branch": "master",
"git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
"installed_by": ["modules"]
},
"gatk4/collectreadcounts": {
"branch": "master",
"git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
Expand Down
7 changes: 7 additions & 0 deletions modules/nf-core/gatk4/bedtointervallist/environment.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

56 changes: 56 additions & 0 deletions modules/nf-core/gatk4/bedtointervallist/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

51 changes: 51 additions & 0 deletions modules/nf-core/gatk4/bedtointervallist/meta.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ params {
tools = null // No default, must be specified

// Germlinecnvcaller options
analysis_type = 'wgs'
bin_length = 1000
mappable_regions = null
padding = 0
Expand Down
43 changes: 43 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,38 @@
"description": "Options used by the germlinecnvcaller subworkflow",
"default": "",
"properties": {
"analysis_type": {
"type": "string",
"default": "wgs",
"description": "Specifies which analysis type for the pipeline- either 'wgs' or 'wes'.",
"fa_icon": "fas fa-align-center",
"enum": ["wgs", "wes"]
},
"bin_length": {
"type": "number",
"default": 1000,
"description": "Length (in bp) of the bins. If zero, no binning will be performed.",
"fa_icon": "fas fa-sort-numeric-down",
"help_text": "Used by GATK's PreprocessIntervals. GATK recommends a bin length of 1000 for WGS analysis, and 0 for WES analysis. "
},
"exclude_bed": {
"type": "string",
"exists": true,
"format": "path",
"fa_icon": "fas fa-file",
"pattern": "^\\S+\\.bed$",
"description": "Path to directory for a bed file containing regions to be exluded from the analysis.",
"help_text": "If the regions you would like to exclude are in bed format, use this option. If you have an interval_list file, use `exclude_interval_list` parameter instead."
},
"exclude_interval_list": {
"type": "string",
"exists": true,
"format": "path",
"fa_icon": "fas fa-file",
"pattern": "^\\S+\\._interval_list$",
"description": "Path to directory for exclude_interval_list file.",
"help_text": "If the regions you would like to exclude are in interval_list format, use this option. If you have a bed file, use `exclude` parameter instead."
},
"mappable_regions": {
"type": "string",
"exists": true,
Expand Down Expand Up @@ -63,6 +88,24 @@
"format": "file-path",
"fa_icon": "fas fa-file",
"help_text": "Used by GATK's AnnotateIntervals."
},
"target_bed": {
"type": "string",
"exists": true,
"format": "path",
"fa_icon": "fas fa-file",
"pattern": "^\\S+\\.bed$",
"description": "Path to directory for target bed file.",
"help_text": "If the regions you would like to analyse are in bed format, use this option. If you have an interval_list file, use `target_interval_list` parameter instead."
},
"target_interval_list": {
"type": "string",
"exists": true,
"format": "path",
"fa_icon": "fas fa-file",
"pattern": "^\\S+\\._interval_list$",
"description": "Path to directory for target interval_list file.",
"help_text": "If the regions you would like to analyse are in interval_list format, use this option. If you have a bed file, use `target_bed` parameter instead."
}
}
},
Expand Down
72 changes: 56 additions & 16 deletions subworkflows/local/germlinecnvcaller_cohort.nf
Original file line number Diff line number Diff line change
@@ -1,21 +1,27 @@
include { GATK4_ANNOTATEINTERVALS } from '../../modules/nf-core/gatk4/annotateintervals/main'
include { GATK4_COLLECTREADCOUNTS } from '../../modules/nf-core/gatk4/collectreadcounts/main'
include { GATK4_DETERMINEGERMLINECONTIGPLOIDY } from '../../modules/nf-core/gatk4/determinegermlinecontigploidy/main'
include { GATK4_FILTERINTERVALS } from '../../modules/nf-core/gatk4/filterintervals/main'
include { GATK4_GERMLINECNVCALLER } from '../../modules/nf-core/gatk4/germlinecnvcaller/main'
include { GATK4_INTERVALLISTTOOLS } from '../../modules/nf-core/gatk4/intervallisttools/main'
include { GATK4_PREPROCESSINTERVALS } from '../../modules/nf-core/gatk4/preprocessintervals/main'
include { PICARD_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/picard/createsequencedictionary/main'
include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main'
include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main'
include { GATK4_ANNOTATEINTERVALS } from '../../modules/nf-core/gatk4/annotateintervals/main'
include { GATK4_BEDTOINTERVALLIST as GATK4_BEDTOINTERVALLIST_TARGETS } from '../../modules/nf-core/gatk4/bedtointervallist/main'
include { GATK4_BEDTOINTERVALLIST as GATK4_BEDTOINTERVALLIST_EXCLUDE } from '../../modules/nf-core/gatk4/bedtointervallist/main'
include { GATK4_COLLECTREADCOUNTS } from '../../modules/nf-core/gatk4/collectreadcounts/main'
include { GATK4_DETERMINEGERMLINECONTIGPLOIDY } from '../../modules/nf-core/gatk4/determinegermlinecontigploidy/main'
include { GATK4_FILTERINTERVALS } from '../../modules/nf-core/gatk4/filterintervals/main'
include { GATK4_GERMLINECNVCALLER } from '../../modules/nf-core/gatk4/germlinecnvcaller/main'
include { GATK4_INTERVALLISTTOOLS } from '../../modules/nf-core/gatk4/intervallisttools/main'
include { GATK4_PREPROCESSINTERVALS } from '../../modules/nf-core/gatk4/preprocessintervals/main'
include { PICARD_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/picard/createsequencedictionary/main'
include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main'
include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main'

workflow GERMLINECNVCALLER_COHORT {
take:
ch_user_dict // channel: [mandatory] [ val(meta), path(dict) ]
ch_user_fai // channel: [mandatory] [ val(meta), path(fai) ]
ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ]
ch_ploidy_priors // channel: [mandatory] [ path(tsv) ]
ch_user_dict // channel: [mandatory] [ val(meta), path(dict) ]
ch_user_fai // channel: [mandatory] [ val(meta), path(fai) ]
ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ]
ch_ploidy_priors // channel: [mandatory] [ path(tsv) ]
ch_target_bed // channel: [mandatory] [ val(meta), path(bed) ]
ch_user_target_interval_list // channel: [mandatory] [ val(meta), path(intervals) ]
ch_exclude_bed // channel: [mandatory] [ val(meta), path(bed) ]
ch_user_exclude_interval_list // channel: [mandatory] [ val(meta), path(intervals) ]

main:
ch_versions = Channel.empty()
Expand All @@ -37,10 +43,42 @@ workflow GERMLINECNVCALLER_COHORT {
.collect()
.set { ch_fai }

GATK4_BEDTOINTERVALLIST_TARGETS (ch_target_bed, ch_dict) //Runs for wes analysis, when target_bed file is provided instead of target_interval_list
GATK4_BEDTOINTERVALLIST_EXCLUDE (ch_exclude_bed, ch_dict) //Runs for wes analysis, when exclude_bed file is provided instead of target_interval_list

ch_user_target_interval_list
.combine(GATK4_BEDTOINTERVALLIST_TARGETS.out.interval_list.ifEmpty(null))
.branch { it ->
intervallistfrompath: it[2].equals(null)
return [it[0], it[1]]
intervallistfrombed: !(it[2].equals(null))
return [it[2], it[3]]
}
.set { ch_targets_for_mix }

ch_targets_for_mix.intervallistfrompath.mix(ch_targets_for_mix.intervallistfrombed)
.collect()
.set {ch_target_interval_list}

ch_user_exclude_interval_list
.combine(GATK4_BEDTOINTERVALLIST_EXCLUDE.out.interval_list.ifEmpty(null))
.branch { it ->
intervallistfrompath: it[2].equals(null)
return [it[0], it[1]]
intervallistfrombed: !(it[2].equals(null))
return [it[2], it[3]]
}
.set { ch_exclude_for_mix }

ch_exclude_for_mix.intervallistfrompath.mix(ch_exclude_for_mix.intervallistfrombed)
.collect()
.set { ch_exclude_interval_list }

GATK4_PREPROCESSINTERVALS ( ch_fasta,
ch_fai,
ch_dict,
[[:],[]], [[:],[]] )
ch_target_interval_list,
ch_exclude_interval_list)

GATK4_ANNOTATEINTERVALS ( GATK4_PREPROCESSINTERVALS.out.interval_list,
ch_fasta,
Expand Down Expand Up @@ -121,6 +159,8 @@ workflow GERMLINECNVCALLER_COHORT {
ch_versions = ch_versions.mix(PICARD_CREATESEQUENCEDICTIONARY.out.versions)
ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
ch_versions = ch_versions.mix(GATK4_PREPROCESSINTERVALS.out.versions)
ch_versions = ch_versions.mix(GATK4_BEDTOINTERVALLIST_TARGETS.out.versions)
ch_versions = ch_versions.mix(GATK4_BEDTOINTERVALLIST_EXCLUDE.out.versions)
ch_versions = ch_versions.mix(GATK4_COLLECTREADCOUNTS.out.versions.first())
ch_versions = ch_versions.mix(GATK4_ANNOTATEINTERVALS.out.versions)
ch_versions = ch_versions.mix(GATK4_FILTERINTERVALS.out.versions)
Expand Down
4 changes: 4 additions & 0 deletions tests/pipeline/germlinecnvcaller_cohort.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ nextflow_workflow {
[[ id:'test' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam"),[]],
[[ id:'test2' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam"),[]])
input[4] = Channel.fromPath(params.ploidy_priors)
input[5] = Channel.value([[:],[]])
input[6] = Channel.value([[:],[]])
input[7] = Channel.value([[:],[]])
input[8] = Channel.value([[:],[]])
"""
}
}
Expand Down
Loading
Loading