Skip to content

Commit

Permalink
Add GRIDSS Assembly (#179)
Browse files Browse the repository at this point in the history
* update gridss related changes

* update gridss preprocess into configs and add assembly process

* update gridss preprocess a and add assembly

* update preprocess coin main.nf and add assembly

* fix emitting of preprocess to assembly

* fifix intermediate publish dir

* update memory for assembly

* Update CHANGELOG.md

* uncomment pipeval

* parse preprocess dir paths and use as input to assembly

* remove redundant view()

* add otherjnmheap parameter to assembly and adjust jvmheap

* optimize F32 memory for assembly

* optimize F72 memory for assembly

* optimize F72 CPU for assembly

* set otherjvmheap value to 4GB and subtract it from task.memory to set jvmheap

* remove redundant collect()

* publish assembly BAM into intermediate dir

---------

Co-authored-by: Mootor <mmootor@ip-0A125937.rhxrlfvjyzbupc03cc22jkch3c.xx.internal.cloudapp.net>
  • Loading branch information
Faizal-Eeman and Mootor authored Oct 17, 2024
1 parent 253dd16 commit 9690887
Show file tree
Hide file tree
Showing 9 changed files with 129 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm

## [Unreleased]
### Added
- Add GRIDSS2 assembly
- Add GRIDSS2 preprocessing
- Add supported Nextflow version to `README.md`
- Add PlantUML diagram
Expand Down
11 changes: 11 additions & 0 deletions config/F16.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,17 @@ process {
}
}

withName: run_assembly_GRIDSS {
cpus = 8
memory = 15.GB
retry_strategy {
memory {
strategy = 'exponential'
operand = 2
}
}
}

withName: call_sSV_Delly {
cpus = 1
memory = 16.GB
Expand Down
11 changes: 11 additions & 0 deletions config/F32.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,17 @@ process {
}
}

withName: run_assembly_GRIDSS {
cpus = 8
memory = 25.GB
retry_strategy {
memory {
strategy = 'exponential'
operand = 2
}
}
}

withName: call_sSV_Delly {
cpus = 1
memory = 30.GB
Expand Down
11 changes: 11 additions & 0 deletions config/F72.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,17 @@ process {
}
}

withName: run_assembly_GRIDSS {
cpus = 8
memory = 35.GB
retry_strategy {
memory {
strategy = 'exponential'
operand = 2
}
}
}

withName: call_sSV_Delly {
cpus = 1
memory = 30.GB
Expand Down
11 changes: 11 additions & 0 deletions config/M64.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,17 @@ process {
}
}

withName: run_assembly_GRIDSS {
cpus = 8
memory = 120.GB
retry_strategy {
memory {
strategy = 'exponential'
operand = 2
}
}
}

withName: call_sSV_Delly {
cpus = 1
memory = 120.GB
Expand Down
5 changes: 5 additions & 0 deletions config/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ gridss_reference_fasta:
mode: 'r'
required: true
help: 'Absolute path to GRIDSS2 reference FASTA file'
gridss_blacklist:
type: 'Path'
mode: 'r'
required: true
help: 'Absolute path to GRIDSS2 blacklist BED file'
exclusion_file:
type: 'Path'
mode: 'r'
Expand Down
3 changes: 3 additions & 0 deletions config/template.config
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ params {
blcds_registered_dataset = false

reference_fasta = "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta"
// GRCh37 blacklist - /hot/ref/tool-specific-input/GRIDSS-2.13.2/GRCh37-EBI-hs37d5/ENCFF001TDO.bed
// GRCh38 blacklist - /hot/ref/tool-specific-input/GRIDSS-2.13.2/GRCh38-BI-20160721/ENCFF356LFX.bed
gridss_blacklist = "/hot/ref/tool-specific-input/GRIDSS-2.13.2/GRCh38-BI-20160721/ENCFF356LFX.bed"
gridss_reference_fasta = "/hot/ref/tool-specific-input/GRIDSS-2.13.2/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta"

exclusion_file = "/hot/ref/tool-specific-input/Delly/hg38/human.hg38.excl.tsv"
Expand Down
15 changes: 14 additions & 1 deletion main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ include { call_sSV_Delly; filter_sSV_Delly } from './module/delly' addParams(
include { call_sSV_Manta } from './module/manta' addParams(
workflow_output_dir: "${params.output_dir_base}/Manta-${params.manta_version}"
)
include { preprocess_BAM_GRIDSS } from './module/gridss' addParams(
include { preprocess_BAM_GRIDSS; run_assembly_GRIDSS } from './module/gridss' addParams(
workflow_output_dir: "${params.output_dir_base}/GRIDSS-${params.gridss_version}"
)
include { generate_sha512 as generate_sha512_BCFtools } from './module/sha512' addParams(
Expand Down Expand Up @@ -204,6 +204,19 @@ workflow {
params.gridss_reference_fasta,
gridss_reference_files
)
gridss_preprocess_dir = preprocess_BAM_GRIDSS.out.gridss_preprocess
.flatten()
.map { parentdir -> parentdir.getParent() }
.unique()
.collect()

run_assembly_GRIDSS(
input_paired_bams_ch,
gridss_preprocess_dir,
params.gridss_reference_fasta,
gridss_reference_files,
params.gridss_blacklist
)
}

}
62 changes: 62 additions & 0 deletions module/gridss.nf
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,65 @@ process preprocess_BAM_GRIDSS {
${sample_bam}
"""
}

process run_assembly_GRIDSS {
container params.docker_image_gridss

publishDir "${params.workflow_output_dir}/intermediate/${task.process.replace(':', '/')}",
pattern: "${tumor_id}.assembly.bam",
mode: "copy",
saveAs: {
"${output_filename}_${sanitize_string(file(it).getName().replace("${tumor_id}.", ""))}"
}

publishDir "${params.workflow_output_dir}/intermediate/${task.process.replace(':', '/')}",
pattern: "${tumor_id}.assembly.bam.gridss.working/*",
mode: "copy",
saveAs: {
"${output_filename}.assembly.bam.gridss.working/${output_filename}_${sanitize_string(file(it).getName().replace("${tumor_id}.", ""))}"
}

publishDir "${params.log_output_dir}/process-log",
pattern: ".command.*",
mode: "copy",
saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" }

input:
tuple(val(tumor_id), path(tumor_bam), path(tumor_bai), path(normal_bam), path(normal_bai))
path(gridss_preprocess_dir)
path(gridss_reference_fasta)
path(gridss_reference_files)
path(gridss_blacklist)

output:
path "${tumor_id}.assembly.bam", emit: gridss_assembly_bam
path "${tumor_id}.assembly.bam.gridss.working/*"
path ".command.*"

script:
otherjvmheap = 4.GB
gridss_otherjvmheap = "${otherjvmheap.toGiga()}g"
gridss_jvmheap = "${(task.memory - otherjvmheap).toGiga()}g"
gridss_jar = "/usr/local/share/gridss-${params.gridss_version}-1/gridss.jar"
output_filename = generate_standard_filename(
"GRIDSS2-${params.gridss_version}",
params.dataset_id,
tumor_id,
[:]
)

"""
set -euo pipefail
gridss \
-r ${gridss_reference_fasta} \
-j ${gridss_jar} \
-s assemble \
-t ${task.cpus} \
--jvmheap ${gridss_jvmheap} \
--otherjvmheap ${gridss_otherjvmheap} \
-b ${gridss_blacklist} \
-a ${tumor_id}.assembly.bam \
${normal_bam} \
${tumor_bam}
"""
}

0 comments on commit 9690887

Please sign in to comment.