diff --git a/CHANGELOG.md b/CHANGELOG.md index 0142dd3..b6d1aee 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## [Unreleased] ### Added +- Add GRIDSS2 assembly - Add GRIDSS2 preprocessing - Add supported Nextflow version to `README.md` - Add PlantUML diagram diff --git a/config/F16.config b/config/F16.config index b762576..7f5f57d 100644 --- a/config/F16.config +++ b/config/F16.config @@ -15,6 +15,17 @@ process { } } + withName: run_assembly_GRIDSS { + cpus = 8 + memory = 15.GB + retry_strategy { + memory { + strategy = 'exponential' + operand = 2 + } + } + } + withName: call_sSV_Delly { cpus = 1 memory = 16.GB diff --git a/config/F32.config b/config/F32.config index b4cb777..27604ab 100644 --- a/config/F32.config +++ b/config/F32.config @@ -15,6 +15,17 @@ process { } } + withName: run_assembly_GRIDSS { + cpus = 8 + memory = 25.GB + retry_strategy { + memory { + strategy = 'exponential' + operand = 2 + } + } + } + withName: call_sSV_Delly { cpus = 1 memory = 30.GB diff --git a/config/F72.config b/config/F72.config index 774614f..6f7835c 100644 --- a/config/F72.config +++ b/config/F72.config @@ -15,6 +15,17 @@ process { } } + withName: run_assembly_GRIDSS { + cpus = 8 + memory = 35.GB + retry_strategy { + memory { + strategy = 'exponential' + operand = 2 + } + } + } + withName: call_sSV_Delly { cpus = 1 memory = 30.GB diff --git a/config/M64.config b/config/M64.config index 166eb7b..29e2494 100644 --- a/config/M64.config +++ b/config/M64.config @@ -15,6 +15,17 @@ process { } } + withName: run_assembly_GRIDSS { + cpus = 8 + memory = 120.GB + retry_strategy { + memory { + strategy = 'exponential' + operand = 2 + } + } + } + withName: call_sSV_Delly { cpus = 1 memory = 120.GB diff --git a/config/schema.yaml b/config/schema.yaml index 176a1cf..e84434e 100644 --- a/config/schema.yaml +++ b/config/schema.yaml @@ -13,6 +13,11 @@ gridss_reference_fasta: mode: 'r' required: true help: 'Absolute path to GRIDSS2 reference FASTA file' +gridss_blacklist: + type: 'Path' + mode: 'r' + required: true + help: 'Absolute path to GRIDSS2 blacklist BED file' exclusion_file: type: 'Path' mode: 'r' diff --git a/config/template.config b/config/template.config index dcc673c..a58bcf3 100755 --- a/config/template.config +++ b/config/template.config @@ -12,6 +12,9 @@ params { blcds_registered_dataset = false reference_fasta = "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta" + // GRCh37 blacklist - /hot/ref/tool-specific-input/GRIDSS-2.13.2/GRCh37-EBI-hs37d5/ENCFF001TDO.bed + // GRCh38 blacklist - /hot/ref/tool-specific-input/GRIDSS-2.13.2/GRCh38-BI-20160721/ENCFF356LFX.bed + gridss_blacklist = "/hot/ref/tool-specific-input/GRIDSS-2.13.2/GRCh38-BI-20160721/ENCFF356LFX.bed" gridss_reference_fasta = "/hot/ref/tool-specific-input/GRIDSS-2.13.2/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta" exclusion_file = "/hot/ref/tool-specific-input/Delly/hg38/human.hg38.excl.tsv" diff --git a/main.nf b/main.nf index ed5a52f..b118c84 100644 --- a/main.nf +++ b/main.nf @@ -55,7 +55,7 @@ include { call_sSV_Delly; filter_sSV_Delly } from './module/delly' addParams( include { call_sSV_Manta } from './module/manta' addParams( workflow_output_dir: "${params.output_dir_base}/Manta-${params.manta_version}" ) -include { preprocess_BAM_GRIDSS } from './module/gridss' addParams( +include { preprocess_BAM_GRIDSS; run_assembly_GRIDSS } from './module/gridss' addParams( workflow_output_dir: "${params.output_dir_base}/GRIDSS-${params.gridss_version}" ) include { generate_sha512 as generate_sha512_BCFtools } from './module/sha512' addParams( @@ -204,6 +204,19 @@ workflow { params.gridss_reference_fasta, gridss_reference_files ) + gridss_preprocess_dir = preprocess_BAM_GRIDSS.out.gridss_preprocess + .flatten() + .map { parentdir -> parentdir.getParent() } + .unique() + .collect() + + run_assembly_GRIDSS( + input_paired_bams_ch, + gridss_preprocess_dir, + params.gridss_reference_fasta, + gridss_reference_files, + params.gridss_blacklist + ) } } diff --git a/module/gridss.nf b/module/gridss.nf index 36e1cdd..a79b935 100644 --- a/module/gridss.nf +++ b/module/gridss.nf @@ -56,3 +56,65 @@ process preprocess_BAM_GRIDSS { ${sample_bam} """ } + +process run_assembly_GRIDSS { + container params.docker_image_gridss + + publishDir "${params.workflow_output_dir}/intermediate/${task.process.replace(':', '/')}", + pattern: "${tumor_id}.assembly.bam", + mode: "copy", + saveAs: { + "${output_filename}_${sanitize_string(file(it).getName().replace("${tumor_id}.", ""))}" + } + + publishDir "${params.workflow_output_dir}/intermediate/${task.process.replace(':', '/')}", + pattern: "${tumor_id}.assembly.bam.gridss.working/*", + mode: "copy", + saveAs: { + "${output_filename}.assembly.bam.gridss.working/${output_filename}_${sanitize_string(file(it).getName().replace("${tumor_id}.", ""))}" + } + + publishDir "${params.log_output_dir}/process-log", + pattern: ".command.*", + mode: "copy", + saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" } + + input: + tuple(val(tumor_id), path(tumor_bam), path(tumor_bai), path(normal_bam), path(normal_bai)) + path(gridss_preprocess_dir) + path(gridss_reference_fasta) + path(gridss_reference_files) + path(gridss_blacklist) + + output: + path "${tumor_id}.assembly.bam", emit: gridss_assembly_bam + path "${tumor_id}.assembly.bam.gridss.working/*" + path ".command.*" + + script: + otherjvmheap = 4.GB + gridss_otherjvmheap = "${otherjvmheap.toGiga()}g" + gridss_jvmheap = "${(task.memory - otherjvmheap).toGiga()}g" + gridss_jar = "/usr/local/share/gridss-${params.gridss_version}-1/gridss.jar" + output_filename = generate_standard_filename( + "GRIDSS2-${params.gridss_version}", + params.dataset_id, + tumor_id, + [:] + ) + + """ + set -euo pipefail + gridss \ + -r ${gridss_reference_fasta} \ + -j ${gridss_jar} \ + -s assemble \ + -t ${task.cpus} \ + --jvmheap ${gridss_jvmheap} \ + --otherjvmheap ${gridss_otherjvmheap} \ + -b ${gridss_blacklist} \ + -a ${tumor_id}.assembly.bam \ + ${normal_bam} \ + ${tumor_bam} + """ + }