From bc487eab8cfaef32ce89f7e9941f1a37edb7f151 Mon Sep 17 00:00:00 2001 From: Mootor Date: Fri, 27 Sep 2024 09:57:33 -0700 Subject: [PATCH 01/18] update gridss related changes --- CHANGELOG.md | 5 ++--- config/default.config | 2 ++ metadata.yaml | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3782c80..0142dd3 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,15 +9,14 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## [Unreleased] ### Added +- Add GRIDSS2 preprocessing - Add supported Nextflow version to `README.md` +- Add PlantUML diagram ### Changed - Update PlantUML action to `v1.0.1` - Update memory allocations in `M64.config` -### Added -- Add PlantUML diagram - --- ## [6.1.0] - 2024-03-12 diff --git a/config/default.config b/config/default.config index 729901a..c1496d9 100644 --- a/config/default.config +++ b/config/default.config @@ -27,12 +27,14 @@ params { // Pipeline tool versions delly_version = '1.2.6' manta_version = '1.6.0' + gridss_version = '2.13.2' bcftools_version = '1.15.1' pipeval_version = '4.0.0-rc.2' // Docker tool versions docker_image_delly = "${-> params.docker_container_registry}/delly:${params.delly_version}" docker_image_manta = "${-> params.docker_container_registry}/manta:${params.manta_version}" + docker_image_gridss = "${-> params.docker_container_registry}/gridss:${params.gridss_version}" docker_image_bcftools = "${-> params.docker_container_registry}/bcftools:${params.bcftools_version}" docker_image_validate = "${-> params.docker_container_registry}/pipeval:${params.pipeval_version}" } diff --git a/metadata.yaml b/metadata.yaml index 82f057a..4a6e8f0 100644 --- a/metadata.yaml +++ b/metadata.yaml @@ -1,8 +1,8 @@ --- category: "pipeline" -description: "Nextflow pipeline to call somatic structural variants using DELLY and Manta" +description: "Nextflow pipeline to call somatic structural variants using DELLY, Manta and GRIDSS2" maintainers: "Boutros Lab Infrastructure " languages: ["Nextflow", "Docker"] dependencies: ["Java", "Nextflow", "Docker"] references: "https://uclahs.box.com/s/qfzr99sc8ntmfddn30ii62wx4273utoz" -tools: ["Delly:v1.2.6", "Manta:v1.6.0", "BCFtools:v1.15.1", "PipeVal:v4.0.0-rc.2"] +tools: ["Delly:v1.2.6", "Manta:v1.6.0", "GRIDSS2:v2.13.2", "BCFtools:v1.15.1", "PipeVal:v4.0.0-rc.2"] From 86d3794ec91f2bdcf1f6a39619d3ae3ad913accd Mon Sep 17 00:00:00 2001 From: Mootor Date: Fri, 27 Sep 2024 09:58:13 -0700 Subject: [PATCH 02/18] update gridss preprocess into configs and add assembly process --- config/F16.config | 24 +++++++++++++++++++++++- config/F32.config | 13 ++++++++++++- config/schema.yaml | 12 ++++++++++++ config/template.config | 6 +++++- 4 files changed, 52 insertions(+), 3 deletions(-) diff --git a/config/F16.config b/config/F16.config index 98a130b..a0fdc1f 100644 --- a/config/F16.config +++ b/config/F16.config @@ -4,6 +4,28 @@ process { memory = 1.GB } + withName: preprocess_BAM_GRIDSS { + cpus = 4 + memory = 10.GB + retry_strategy { + memory { + strategy = 'exponential' + operand = 2 + } + } + } + + withName: run_assembly_GRIDSS { + cpus = 8 + memory = 30.GB + retry_strategy { + memory { + strategy = 'exponential' + operand = 2 + } + } + } + withName: call_sSV_Delly { cpus = 1 memory = 16.GB @@ -36,7 +58,7 @@ process { } } } - + withName: call_sSV_Manta { cpus = 1 memory = 16.GB diff --git a/config/F32.config b/config/F32.config index a0a4547..a42bfcc 100644 --- a/config/F32.config +++ b/config/F32.config @@ -4,6 +4,17 @@ process { memory = 1.GB } + withName: run_assembly_GRIDSS { + cpus = 8 + memory = 40.GB + retry_strategy { + memory { + strategy = 'exponential' + operand = 2 + } + } + } + withName: call_sSV_Delly { cpus = 1 memory = 30.GB @@ -36,7 +47,7 @@ process { } } } - + withName: call_sSV_Manta { cpus = 1 memory = 30.GB diff --git a/config/schema.yaml b/config/schema.yaml index 63cfac5..121adb6 100644 --- a/config/schema.yaml +++ b/config/schema.yaml @@ -8,6 +8,16 @@ reference_fasta: mode: 'r' required: true help: 'Absolute path to a reference FASTA file' +gridss_reference_dir: + type: 'Path' + mode: 'r' + required: true + help: 'Absolute path to GRIDSS2 reference dir' +gridss_blacklist: + type: 'Path' + mode: 'r' + required: true + help: 'Absolute path to GRIDSS2 blacklist BED file' exclusion_file: type: 'Path' mode: 'r' @@ -20,9 +30,11 @@ algorithm: default: - delly - manta + - gridss2 choices: - delly - manta + - gridss2 output_dir: type: 'Path' mode: 'w' diff --git a/config/template.config b/config/template.config index 2e9638b..6b660cc 100755 --- a/config/template.config +++ b/config/template.config @@ -12,13 +12,17 @@ params { blcds_registered_dataset = false reference_fasta = "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta" + gridss_reference_dir = "/hot/ref/tool-specific-input/GRIDSS-2.13.2/GRCh38-BI-20160721/" + // GRCh37 blacklist - /hot/ref/tool-specific-input/GRIDSS-2.13.2/GRCh37-EBI-hs37d5/ENCFF001TDO.bed + // GRCh38 blacklist - /hot/ref/tool-specific-input/GRIDSS-2.13.2/GRCh38-BI-20160721/ENCFF356LFX.bed + gridss_blacklist = "/hot/ref/tool-specific-input/GRIDSS-2.13.2/GRCh38-BI-20160721/ENCFF356LFX.bed" exclusion_file = "/hot/ref/tool-specific-input/Delly/hg38/human.hg38.excl.tsv" output_dir = "where/to/save/outputs/" // select the tool(s) to run - algorithm = [] // algorithm = ['delly', 'manta'] + algorithm = [] // algorithm = ['delly', 'manta', 'gridss2'] save_intermediate_files = false From d04245e2c0d91d1e42216338daa75f8ebedf5f4a Mon Sep 17 00:00:00 2001 From: Mootor Date: Fri, 27 Sep 2024 09:58:44 -0700 Subject: [PATCH 03/18] update gridss preprocess a and add assembly --- module/gridss.nf | 118 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 module/gridss.nf diff --git a/module/gridss.nf b/module/gridss.nf new file mode 100644 index 0000000..6863613 --- /dev/null +++ b/module/gridss.nf @@ -0,0 +1,118 @@ +#!/usr/bin/env nextflow + +log.info """\ +------------------------------------ + G R I D S S 2 +------------------------------------ +Docker Images: +- docker_image_gridss: ${params.docker_image_gridss} +""" + +include { generate_standard_filename; sanitize_string } from '../external/pipeline-Nextflow-module/modules/common/generate_standardized_filename/main.nf' + +process preprocess_BAM_GRIDSS { + container params.docker_image_gridss + + publishDir "${params.workflow_output_dir}/intermediate", + pattern: "${bam_name}.gridss.working/*", + mode: "copy", + saveAs: { + "${output_filename}.${sanitize_string(file(it).getName().replace("${bam_name}.", ""))}" + } + + publishDir "${params.log_output_dir}/process-log", + pattern: ".command.*", + mode: "copy", + saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" } + + input: + tuple(val(sample_id), path(sample_bam), path(sample_index)) + path(gridss_reference_fasta) + path(gridss_reference_files) + + output: + path "${bam_name}.gridss.working/*", emit: gridss_preprocess + path ".command.*" + + script: + gridss_mem = "${task.memory.toGiga()}g" + gridss_jar = "/usr/local/share/gridss-${params.gridss_version}-1/gridss.jar" + bam_name = file(sample_bam).getName() + output_filename = generate_standard_filename( + "GRIDSS2-${params.gridss_version}", + params.dataset_id, + sample_id, + [:] + ) + + """ + set -euo pipefail + gridss \ + -r ${gridss_reference_fasta} \ + -j ${gridss_jar} \ + -s preprocess \ + -t ${task.cpus} \ + --jvmheap ${gridss_mem} \ + ${sample_bam} + """ + } + +process run_assembly_GRIDSS { + container params.docker_image_gridss + + publishDir "${params.workflow_output_dir}/output", + pattern: "${tumor_id}.assembly.bam", + mode: "copy", + saveAs: { + "${output_filename}_${sanitize_string(file(it).getName().replace("${tumor_id}.", ""))}" + } + + publishDir "${params.workflow_output_dir}/intermediate", + pattern: "${tumor_id}.assembly.bam.gridss.working/*", + mode: "copy", + saveAs: { + "${output_filename}_${sanitize_string(file(it).getName().replace("${tumor_id}.", ""))}" + } + + publishDir "${params.log_output_dir}/process-log", + pattern: ".command.*", + mode: "copy", + saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" } + + input: + tuple(val(tumor_id), path(tumor_bam), path(tumor_bai), path(normal_bam), path(normal_bai)) + path(gridss_reference_fasta) + path(gridss_reference_files) + path(gridss_blacklist) + + output: + path "${tumor_id}.assembly.bam", emit: gridss_assembly_bam + path "${tumor_id}.assembly.bam.gridss.working/*" + path ".command.*" + + script: + gridss_command_mem_diff = 2.GB + gridss_mem = "${(task.memory - gridss_command_mem_diff).toGiga()}g" + //gridss_mem = "${task.memory.toGiga()}g" + gridss_jar = "/usr/local/share/gridss-${params.gridss_version}-1/gridss.jar" + output_filename = generate_standard_filename( + "GRIDSS2-${params.gridss_version}", + params.dataset_id, + tumor_id, + [:] + ) + //--jvmheap ${gridss_mem} \ + """ + set -euo pipefail + gridss \ + -r ${gridss_reference_fasta} \ + -j ${gridss_jar} \ + -s assemble \ + -t ${task.cpus} \ + --jvmheap ${gridss_mem} \ + -b ${gridss_blacklist} \ + -a ${tumor_id}.assembly.bam \ + ${normal_bam} \ + ${tumor_bam} + """ + } From 868590d0db97b3b9b627247392ea15b4a8d7b1c7 Mon Sep 17 00:00:00 2001 From: Mootor Date: Fri, 27 Sep 2024 09:59:32 -0700 Subject: [PATCH 04/18] update preprocess coin main.nf and add assembly --- main.nf | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/main.nf b/main.nf index 57654f5..83de408 100644 --- a/main.nf +++ b/main.nf @@ -34,6 +34,7 @@ Current Configuration: DELLY: ${params.delly_version} BCFtools: ${params.bcftools_version} Manta: ${params.manta_version} + GRIDSS2: ${params.gridss_version} PipeVal: ${params.pipeval_version} ------------------------------------ @@ -54,6 +55,9 @@ include { call_sSV_Delly; filter_sSV_Delly } from './module/delly' addParams( include { call_sSV_Manta } from './module/manta' addParams( workflow_output_dir: "${params.output_dir_base}/Manta-${params.manta_version}" ) +include { run_assembly_GRIDSS } from './module/gridss' addParams( + workflow_output_dir: "${params.output_dir_base}/GRIDSS-${params.gridss_version}" + ) include { generate_sha512 as generate_sha512_BCFtools } from './module/sha512' addParams( workflow_output_dir: "${params.output_dir_base}/DELLY-${params.delly_version}" ) @@ -74,10 +78,15 @@ Channel.from(params.samples_to_process) .map{ sample -> ['index': indexFile(sample.path)] + sample } .set{ input_ch_samples_with_index } +Channel.from(params.samples_to_process) + .map{ sample -> [sample.id, sample.path, indexFile(sample.path)] } + .set{ gridss_ch } + input_ch_samples_with_index .map{ sample -> [sample.path, sample.index] } .flatten() .set{ input_validation } + if (params.verbose){ input_validation.view() } @@ -96,6 +105,11 @@ if (params.verbose){ reference_fasta_index = "${params.reference_fasta}.fai" +// Collect GRIDSS reference files +gridss_reference = Channel.fromPath( "${params.gridss_reference_dir}/*", checkIfExists: true ) +gridss_reference_fasta = gridss_reference.filter { it.name.endsWith(".fasta") }.collect() +gridss_reference_files = gridss_reference.filter { !it.name.endsWith(".fasta") && !it.name.endsWith(".bed") }.collect() + workflow { /** * Validate the input bams @@ -106,7 +120,6 @@ workflow { name: 'input_validation.txt', storeDir: "${params.output_dir_base}/validation/run_validate_PipeVal" ) - /** * Call "delly call -x hg19.excl -o t1.bcf -g hg19.fa tumor1.bam normal1.bam" per paired (tumor sample, normal sample) * The sv are stored in call_sSV_Delly.out.nt_call_bcf @@ -158,7 +171,6 @@ workflow { call_sSV_Delly.out.nt_call_bcf_csi, call_sSV_Delly.out.tumor_id ) - /** * Filter the output bcf from filter_sSV_Delly. * The default filter_condition is "FILTER=='PASS'", which filters out NonPass calls. @@ -168,7 +180,6 @@ workflow { params.filter_condition, call_sSV_Delly.out.tumor_id ) - /** * Generate one sha512 checksum for DELLY's output files. */ @@ -189,4 +200,18 @@ workflow { call_sSV_Manta.out.manta_vcfs.flatten() ) } + if ('gridss2' in params.algorithm) { + preprocess_BAM_GRIDSS( + gridss_ch, + gridss_reference_fasta, + gridss_reference_files + ) + run_assembly_GRIDSS( + input_paired_bams_ch, + gridss_reference_fasta, + gridss_reference_files, + params.gridss_blacklist + ) + } + } From 4b897444222da6a4e96db570bfe7cd7e3eedd791 Mon Sep 17 00:00:00 2001 From: Mootor Date: Fri, 4 Oct 2024 15:32:55 -0700 Subject: [PATCH 05/18] fix emitting of preprocess to assembly --- main.nf | 15 ++++++++------- module/gridss.nf | 9 ++++----- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/main.nf b/main.nf index f810f07..f55eca0 100644 --- a/main.nf +++ b/main.nf @@ -112,12 +112,12 @@ workflow { /** * Validate the input bams */ - run_validate_PipeVal(input_validation) + // run_validate_PipeVal(input_validation) // Collect and store input validation output - run_validate_PipeVal.out.validation_result.collectFile( - name: 'input_validation.txt', - storeDir: "${params.output_dir_base}/validation/run_validate_PipeVal" - ) + // run_validate_PipeVal.out.validation_result.collectFile( + // name: 'input_validation.txt', + // storeDir: "${params.output_dir_base}/validation/run_validate_PipeVal" + // ) /** * Call "delly call -x hg19.excl -o t1.bcf -g hg19.fa tumor1.bam normal1.bam" per paired (tumor sample, normal sample) * The sv are stored in call_sSV_Delly.out.nt_call_bcf @@ -201,12 +201,13 @@ workflow { if ('gridss2' in params.algorithm) { preprocess_BAM_GRIDSS( gridss_ch, - gridss_reference_fasta, + params.gridss_reference_fasta, gridss_reference_files ) run_assembly_GRIDSS( input_paired_bams_ch, - gridss_reference_fasta, + preprocess_BAM_GRIDSS.out.gridss_preprocess.collect(), + params.gridss_reference_fasta, gridss_reference_files, params.gridss_blacklist ) diff --git a/module/gridss.nf b/module/gridss.nf index 6372d71..4def2fa 100644 --- a/module/gridss.nf +++ b/module/gridss.nf @@ -31,7 +31,7 @@ process preprocess_BAM_GRIDSS { path(gridss_reference_files) output: - path "${bam_name}.gridss.working/*", emit: gridss_preprocess + path "${bam_name}.gridss.working", emit: gridss_preprocess path ".command.*" script: @@ -81,6 +81,7 @@ process run_assembly_GRIDSS { input: tuple(val(tumor_id), path(tumor_bam), path(tumor_bai), path(normal_bam), path(normal_bai)) + path(gridss_preprocess_files) path(gridss_reference_fasta) path(gridss_reference_files) path(gridss_blacklist) @@ -91,9 +92,7 @@ process run_assembly_GRIDSS { path ".command.*" script: - gridss_command_mem_diff = 2.GB - gridss_mem = "${(task.memory - gridss_command_mem_diff).toGiga()}g" - //gridss_mem = "${task.memory.toGiga()}g" + gridss_mem = "${task.memory.toGiga()}g" gridss_jar = "/usr/local/share/gridss-${params.gridss_version}-1/gridss.jar" output_filename = generate_standard_filename( "GRIDSS2-${params.gridss_version}", @@ -101,7 +100,7 @@ process run_assembly_GRIDSS { tumor_id, [:] ) - //--jvmheap ${gridss_mem} \ + """ set -euo pipefail gridss \ From 0ffbdd4337b54c0e57f1fdcadd067d40a91f22a0 Mon Sep 17 00:00:00 2001 From: Mootor Date: Fri, 4 Oct 2024 15:48:42 -0700 Subject: [PATCH 06/18] fifix intermediate publish dir --- module/gridss.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/gridss.nf b/module/gridss.nf index 4def2fa..1b11cdc 100644 --- a/module/gridss.nf +++ b/module/gridss.nf @@ -67,7 +67,7 @@ process run_assembly_GRIDSS { "${output_filename}_${sanitize_string(file(it).getName().replace("${tumor_id}.", ""))}" } - publishDir "${params.workflow_output_dir}/intermediate", + publishDir "${params.workflow_output_dir}/intermediate/${task.process.replace(':', '/')}", pattern: "${tumor_id}.assembly.bam.gridss.working/*", mode: "copy", saveAs: { From eb08dd35edf560c6657425648dc1b2cd7f0fef5b Mon Sep 17 00:00:00 2001 From: Mootor Date: Fri, 4 Oct 2024 15:51:11 -0700 Subject: [PATCH 07/18] update memory for assembly --- config/F16.config | 2 +- config/F72.config | 2 +- config/M64.config | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/F16.config b/config/F16.config index 394e6f4..7f5f57d 100644 --- a/config/F16.config +++ b/config/F16.config @@ -17,7 +17,7 @@ process { withName: run_assembly_GRIDSS { cpus = 8 - memory = 20.GB + memory = 15.GB retry_strategy { memory { strategy = 'exponential' diff --git a/config/F72.config b/config/F72.config index 4d5e703..1f81130 100644 --- a/config/F72.config +++ b/config/F72.config @@ -17,7 +17,7 @@ process { withName: run_assembly_GRIDSS { cpus = 8 - memory = 20.GB + memory = 30.GB retry_strategy { memory { strategy = 'exponential' diff --git a/config/M64.config b/config/M64.config index e0f885c..29e2494 100644 --- a/config/M64.config +++ b/config/M64.config @@ -17,7 +17,7 @@ process { withName: run_assembly_GRIDSS { cpus = 8 - memory = 20.GB + memory = 120.GB retry_strategy { memory { strategy = 'exponential' From 8fe2597e8c7fccc9d99a3a13ff2068365e56510a Mon Sep 17 00:00:00 2001 From: Mootor Date: Fri, 4 Oct 2024 15:51:42 -0700 Subject: [PATCH 08/18] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0142dd3..b6d1aee 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## [Unreleased] ### Added +- Add GRIDSS2 assembly - Add GRIDSS2 preprocessing - Add supported Nextflow version to `README.md` - Add PlantUML diagram From c37a5b3f2fff2eb5258de414a188379f881d3791 Mon Sep 17 00:00:00 2001 From: Mootor Date: Fri, 4 Oct 2024 18:21:48 -0700 Subject: [PATCH 09/18] uncomment pipeval --- main.nf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/main.nf b/main.nf index f55eca0..888ee8c 100644 --- a/main.nf +++ b/main.nf @@ -112,12 +112,12 @@ workflow { /** * Validate the input bams */ - // run_validate_PipeVal(input_validation) + run_validate_PipeVal(input_validation) // Collect and store input validation output - // run_validate_PipeVal.out.validation_result.collectFile( - // name: 'input_validation.txt', - // storeDir: "${params.output_dir_base}/validation/run_validate_PipeVal" - // ) + run_validate_PipeVal.out.validation_result.collectFile( + name: 'input_validation.txt', + storeDir: "${params.output_dir_base}/validation/run_validate_PipeVal" + ) /** * Call "delly call -x hg19.excl -o t1.bcf -g hg19.fa tumor1.bam normal1.bam" per paired (tumor sample, normal sample) * The sv are stored in call_sSV_Delly.out.nt_call_bcf From 5f574446ea3f6761c045c9c52d4409297ffe5f95 Mon Sep 17 00:00:00 2001 From: Mootor Date: Mon, 7 Oct 2024 11:42:17 -0700 Subject: [PATCH 10/18] parse preprocess dir paths and use as input to assembly --- main.nf | 9 ++++++++- module/gridss.nf | 4 ++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/main.nf b/main.nf index 888ee8c..01d8358 100644 --- a/main.nf +++ b/main.nf @@ -204,9 +204,16 @@ workflow { params.gridss_reference_fasta, gridss_reference_files ) + gridss_preprocess_dir = preprocess_BAM_GRIDSS.out.gridss_preprocess + .collect() + .flatten() + .map { parentdir -> parentdir.getParent() } + .unique() + .collect() + gridss_preprocess_dir.view() run_assembly_GRIDSS( input_paired_bams_ch, - preprocess_BAM_GRIDSS.out.gridss_preprocess.collect(), + gridss_preprocess_dir, params.gridss_reference_fasta, gridss_reference_files, params.gridss_blacklist diff --git a/module/gridss.nf b/module/gridss.nf index 1b11cdc..8990f65 100644 --- a/module/gridss.nf +++ b/module/gridss.nf @@ -31,7 +31,7 @@ process preprocess_BAM_GRIDSS { path(gridss_reference_files) output: - path "${bam_name}.gridss.working", emit: gridss_preprocess + path "${bam_name}.gridss.working/*", emit: gridss_preprocess path ".command.*" script: @@ -81,7 +81,7 @@ process run_assembly_GRIDSS { input: tuple(val(tumor_id), path(tumor_bam), path(tumor_bai), path(normal_bam), path(normal_bai)) - path(gridss_preprocess_files) + path(gridss_preprocess_dir) path(gridss_reference_fasta) path(gridss_reference_files) path(gridss_blacklist) From 37fe3123529e9e870d3bebce9e6a5c63e2d99e99 Mon Sep 17 00:00:00 2001 From: Mootor Date: Mon, 7 Oct 2024 14:35:52 -0700 Subject: [PATCH 11/18] remove redundant view() --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 01d8358..ec84b05 100644 --- a/main.nf +++ b/main.nf @@ -210,7 +210,7 @@ workflow { .map { parentdir -> parentdir.getParent() } .unique() .collect() - gridss_preprocess_dir.view() + run_assembly_GRIDSS( input_paired_bams_ch, gridss_preprocess_dir, From 468fe039d1cbed09438891587ed8095eb4efc618 Mon Sep 17 00:00:00 2001 From: Mootor Date: Thu, 10 Oct 2024 14:33:18 -0700 Subject: [PATCH 12/18] add otherjnmheap parameter to assembly and adjust jvmheap --- module/gridss.nf | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/module/gridss.nf b/module/gridss.nf index 8990f65..c63b0cf 100644 --- a/module/gridss.nf +++ b/module/gridss.nf @@ -92,7 +92,9 @@ process run_assembly_GRIDSS { path ".command.*" script: - gridss_mem = "${task.memory.toGiga()}g" + otherjvmheap = 4.GB + gridss_otherjvmheap = "${otherjvmheap.toGiga()}g" + gridss_jvmheap = "${(task.memory - gridss_otherjvmheap).toGiga()}g" gridss_jar = "/usr/local/share/gridss-${params.gridss_version}-1/gridss.jar" output_filename = generate_standard_filename( "GRIDSS2-${params.gridss_version}", @@ -108,7 +110,8 @@ process run_assembly_GRIDSS { -j ${gridss_jar} \ -s assemble \ -t ${task.cpus} \ - --jvmheap ${gridss_mem} \ + --jvmheap ${gridss_jvmheap} \ + --otherjvmheap ${gridss_otherjvmheap} \ -b ${gridss_blacklist} \ -a ${tumor_id}.assembly.bam \ ${normal_bam} \ From 29c2b5abbcdcba5f86ca56c17eed4f34da043dfe Mon Sep 17 00:00:00 2001 From: Mootor Date: Fri, 11 Oct 2024 11:44:14 -0700 Subject: [PATCH 13/18] optimize F32 memory for assembly --- config/F32.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/F32.config b/config/F32.config index e33b087..27604ab 100644 --- a/config/F32.config +++ b/config/F32.config @@ -17,7 +17,7 @@ process { withName: run_assembly_GRIDSS { cpus = 8 - memory = 20.GB + memory = 25.GB retry_strategy { memory { strategy = 'exponential' From f52dd639360233a7f176e3a52924636a17213822 Mon Sep 17 00:00:00 2001 From: Mootor Date: Fri, 11 Oct 2024 11:44:58 -0700 Subject: [PATCH 14/18] optimize F72 memory for assembly --- config/F72.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/F72.config b/config/F72.config index 1f81130..8760c6b 100644 --- a/config/F72.config +++ b/config/F72.config @@ -16,8 +16,8 @@ process { } withName: run_assembly_GRIDSS { - cpus = 8 - memory = 30.GB + cpus = 1 + memory = 35.GB retry_strategy { memory { strategy = 'exponential' From 58bd048a6e84c4a553fde5220e2008aafc3cbd8a Mon Sep 17 00:00:00 2001 From: Mootor Date: Fri, 11 Oct 2024 11:45:22 -0700 Subject: [PATCH 15/18] optimize F72 CPU for assembly --- config/F72.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/F72.config b/config/F72.config index 8760c6b..6f7835c 100644 --- a/config/F72.config +++ b/config/F72.config @@ -16,7 +16,7 @@ process { } withName: run_assembly_GRIDSS { - cpus = 1 + cpus = 8 memory = 35.GB retry_strategy { memory { From 630b1e1dda3a74a5b6dcd0ea11c0b88f390f5358 Mon Sep 17 00:00:00 2001 From: Mootor Date: Fri, 11 Oct 2024 11:48:47 -0700 Subject: [PATCH 16/18] set otherjvmheap value to 4GB and subtract it from task.memory to set jvmheap --- module/gridss.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/gridss.nf b/module/gridss.nf index c63b0cf..18cf9b5 100644 --- a/module/gridss.nf +++ b/module/gridss.nf @@ -94,7 +94,7 @@ process run_assembly_GRIDSS { script: otherjvmheap = 4.GB gridss_otherjvmheap = "${otherjvmheap.toGiga()}g" - gridss_jvmheap = "${(task.memory - gridss_otherjvmheap).toGiga()}g" + gridss_jvmheap = "${(task.memory - otherjvmheap).toGiga()}g" gridss_jar = "/usr/local/share/gridss-${params.gridss_version}-1/gridss.jar" output_filename = generate_standard_filename( "GRIDSS2-${params.gridss_version}", From 817cec5a858906cf49f81ba7eb4b3c564298e1b3 Mon Sep 17 00:00:00 2001 From: Mootor Date: Fri, 11 Oct 2024 12:10:51 -0700 Subject: [PATCH 17/18] remove redundant collect() --- main.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/main.nf b/main.nf index ec84b05..b118c84 100644 --- a/main.nf +++ b/main.nf @@ -205,7 +205,6 @@ workflow { gridss_reference_files ) gridss_preprocess_dir = preprocess_BAM_GRIDSS.out.gridss_preprocess - .collect() .flatten() .map { parentdir -> parentdir.getParent() } .unique() From ad632fd7bfbc0ff6b3b0cf136d337fb6831016e1 Mon Sep 17 00:00:00 2001 From: Mootor Date: Wed, 16 Oct 2024 19:04:47 -0700 Subject: [PATCH 18/18] publish assembly BAM into intermediate dir --- module/gridss.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/module/gridss.nf b/module/gridss.nf index 18cf9b5..a79b935 100644 --- a/module/gridss.nf +++ b/module/gridss.nf @@ -60,7 +60,7 @@ process preprocess_BAM_GRIDSS { process run_assembly_GRIDSS { container params.docker_image_gridss - publishDir "${params.workflow_output_dir}/output", + publishDir "${params.workflow_output_dir}/intermediate/${task.process.replace(':', '/')}", pattern: "${tumor_id}.assembly.bam", mode: "copy", saveAs: { @@ -71,7 +71,7 @@ process run_assembly_GRIDSS { pattern: "${tumor_id}.assembly.bam.gridss.working/*", mode: "copy", saveAs: { - "${output_filename}_${sanitize_string(file(it).getName().replace("${tumor_id}.", ""))}" + "${output_filename}.assembly.bam.gridss.working/${output_filename}_${sanitize_string(file(it).getName().replace("${tumor_id}.", ""))}" } publishDir "${params.log_output_dir}/process-log",