diff --git a/CHANGELOG.md b/CHANGELOG.md index a192ac60..e94a97cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- Add compression of `SomaticSniper` `bam-readcount` output and move to `intermediate` directory - Add `ncbi_build` parameter - Add conversion of concatenated VCF to MAF - Add concatenation of consensus variants to one VCF @@ -18,6 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add `split_VCF_bcftools` to `Mutect2` workflow, separating SNVs, MNVs and Indels ### Changed +- Remove redundant directories in Intersect log output directories +- Change compression of intersect MAF file to bzip2 - Update `README.md` - Use `set_env` from `pipeline-Nextflow-config` - Update resource allocation to include new processes diff --git a/config/default.config b/config/default.config index a052eb54..4e05233d 100644 --- a/config/default.config +++ b/config/default.config @@ -19,6 +19,7 @@ params { GATK_version = "4.4.0.0" somaticsniper_version = "1.0.5.0" bam_readcount_version = "0.8.0" + blarchive_version = "2.0.0" strelka2_version = "2.9.10" manta_version = "1.6.0" MuSE_version = "2.0.2" @@ -30,6 +31,7 @@ params { docker_image_GATK = "broadinstitute/gatk:${params.GATK_version}" docker_image_somaticsniper = "${-> params.docker_container_registry}/somaticsniper:${params.somaticsniper_version}" docker_image_bam_readcount = "${-> params.docker_container_registry}/bam-readcount:${params.bam_readcount_version}" + docker_image_blarchive = "${-> params.docker_container_registry}/blarchive:${params.blarchive_version}" docker_image_strelka2 = "${-> params.docker_container_registry}/strelka2:${params.strelka2_version}" docker_image_manta = "${-> params.docker_container_registry}/manta:${params.manta_version}" docker_image_MuSE = "${-> params.docker_container_registry}/muse:${params.MuSE_version}" diff --git a/main.nf b/main.nf index 3ce44f91..4f14961b 100755 --- a/main.nf +++ b/main.nf @@ -83,8 +83,8 @@ include { muse } from './module/muse' addParams( [:])) include { intersect } from './module/intersect' addParams( - workflow_output_dir: "${params.output_dir_base}/intersect-BCFtools-${params.BCFtools_version}", - workflow_log_output_dir: "${params.log_output_dir}/process-log/intersect-BCFtools-${params.BCFtools_version}", + workflow_output_dir: "${params.output_dir_base}/Intersect-BCFtools-${params.BCFtools_version}", + workflow_log_output_dir: "${params.log_output_dir}/process-log/Intersect-BCFtools-${params.BCFtools_version}", output_filename: generate_standard_filename("BCFtools-${params.BCFtools_version}", params.dataset_id, params.sample_id, diff --git a/module/common.nf b/module/common.nf index 1af598fa..b5185a8e 100644 --- a/module/common.nf +++ b/module/common.nf @@ -5,6 +5,7 @@ log.info """\ Docker Images: - docker_image_BCFtools: ${params.docker_image_BCFtools} - docker_image_validate_params: ${params.docker_image_validate_params} +- docker_image_blarchive: ${params.docker_image_blarchive} """ process generate_sha512sum { @@ -63,3 +64,31 @@ process rename_samples_BCFtools { bcftools reheader -s ${params.output_filename}_samples.txt --output ${params.output_filename}_${var_type}.vcf.gz ${vcf} """ } + +process compress_file_blarchive { + container params.docker_image_blarchive + publishDir path: params.blarchive_publishDir, + mode: "copy", + pattern: "*.bz2", + enabled: params.blarchive_enabled + publishDir path: "${params.workflow_log_output_dir}", + mode: "copy", + pattern: ".command.*", + saveAs: { "${task.process.split(':')[-1]}-${file_type}/log${file(it).getName()}" } + + input: + tuple val(file_type), path(file_to_compress) + + output: + tuple val(file_type), path("*.bz2"), emit: compressed_file + path ".command.*" + + script: + """ + set -euo pipefail + dereferenced_file=\$(readlink -f ${file_to_compress}) + blarchive compress_files --input \$dereferenced_file \ + --log ${params.work_dir} + ln -s \${dereferenced_file}.bz2 ${file_to_compress}.bz2 + """ + } diff --git a/module/intersect-processes.nf b/module/intersect-processes.nf index 524d2143..6fac58de 100644 --- a/module/intersect-processes.nf +++ b/module/intersect-processes.nf @@ -5,6 +5,9 @@ log.info """\ Docker Images: - docker_image_BCFtools: ${params.docker_image_BCFtools} - docker_image_r_VennDiagram: ${params.docker_image_r_VennDiagram} +Intersect Options: +- ncbi_build: ${params.ncbi_build} +- vcf2maf_extra_args: ${params.vcf2maf_extra_args} ==================================== """ process intersect_VCFs_BCFtools { @@ -23,7 +26,7 @@ process intersect_VCFs_BCFtools { publishDir path: "${params.workflow_log_output_dir}", mode: "copy", pattern: ".command.*", - saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" } + saveAs: { "${task.process.split(':')[-1]}/log${file(it).getName()}" } input: path vcfs @@ -65,30 +68,30 @@ process intersect_VCFs_BCFtools { """ } - process plot_VennDiagram_R { - container params.docker_image_r_VennDiagram - publishDir path: "${params.workflow_output_dir}/output", - mode: "copy", - pattern: "*.tiff" - publishDir path: "${params.workflow_log_output_dir}", - mode: "copy", - pattern: ".command.*", - saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" } +process plot_VennDiagram_R { + container params.docker_image_r_VennDiagram + publishDir path: "${params.workflow_output_dir}/output", + mode: "copy", + pattern: "*.tiff" + publishDir path: "${params.workflow_log_output_dir}", + mode: "copy", + pattern: ".command.*", + saveAs: { "${task.process.split(':')[-1]}/log${file(it).getName()}" } - input: - path script_dir - path isec + input: + path script_dir + path isec - output: - path ".command.*" - path "*.tiff" + output: + path ".command.*" + path "*.tiff" - script: - """ - set -euo pipefail - Rscript ${script_dir}/plot-venn.R --isec_readme README.txt --isec_sites sites.txt --outfile ${params.output_filename}_Venn-diagram.tiff - """ - } + script: + """ + set -euo pipefail + Rscript ${script_dir}/plot-venn.R --isec_readme README.txt --isec_sites sites.txt --outfile ${params.output_filename}_Venn-diagram.tiff + """ + } process concat_VCFs_BCFtools { container params.docker_image_BCFtools @@ -99,7 +102,7 @@ process concat_VCFs_BCFtools { publishDir path: "${params.workflow_log_output_dir}", mode: "copy", pattern: ".command.*", - saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" } + saveAs: { "${task.process.split(':')[-1]}/log${file(it).getName()}" } input: path vcfs @@ -134,7 +137,7 @@ process convert_VCF_vcf2maf { publishDir path: "${params.workflow_log_output_dir}", mode: "copy", pattern: ".command.*", - saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" } + saveAs: { "${task.process.split(':')[-1]}/log${file(it).getName()}" } input: path vcf @@ -160,27 +163,3 @@ process convert_VCF_vcf2maf { ${params.vcf2maf_extra_args} """ } - -process compress_MAF_vcf2maf { - container params.docker_image_vcf2maf - publishDir path: "${params.workflow_output_dir}/output", - mode: "copy", - pattern: "*.gz" - publishDir path: "${params.workflow_log_output_dir}", - mode: "copy", - pattern: ".command.*", - saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" } - - input: - path maf - - output: - path "*.gz", emit: concat_maf_gz - path ".command.*" - - script: - """ - set -euo pipefail - gzip --stdout ${maf} > ${maf}.gz - """ - } diff --git a/module/intersect.nf b/module/intersect.nf index 8dc630c5..94803ba3 100644 --- a/module/intersect.nf +++ b/module/intersect.nf @@ -1,5 +1,9 @@ include { generate_sha512sum } from './common' -include { intersect_VCFs_BCFtools; plot_VennDiagram_R; concat_VCFs_BCFtools ; convert_VCF_vcf2maf; compress_MAF_vcf2maf } from './intersect-processes.nf' +include { compress_file_blarchive} from './common' addParams( + blarchive_publishDir : "${params.workflow_output_dir}/output", + blarchive_enabled : true + ) +include { intersect_VCFs_BCFtools; plot_VennDiagram_R; concat_VCFs_BCFtools ; convert_VCF_vcf2maf } from './intersect-processes.nf' include { compress_index_VCF } from '../external/pipeline-Nextflow-module/modules/common/index_VCF_tabix/main.nf' addParams( options: [ output_dir: params.workflow_output_dir, @@ -51,7 +55,9 @@ workflow intersect { compress_index_VCF(concat_VCFs_BCFtools.out.concat_vcf .map{ it -> ['SNV', it]} ) - compress_MAF_vcf2maf(convert_VCF_vcf2maf.out.concat_maf) + compress_file_blarchive(convert_VCF_vcf2maf.out.concat_maf + .map{ it -> ['MAF', it]} + ) file_for_sha512 = intersect_VCFs_BCFtools.out.intersect_vcf .flatten() .map{ it -> ["${file(it).getName().split('_')[0]}-SNV-vcf", it]} @@ -60,13 +66,13 @@ workflow intersect { .map{ it -> ["${file(it).getName().split('_')[0]}-SNV-idx", it]} ) .mix(compress_index_VCF.out.index_out - .map{ it -> ["intersect-${it[0]}-vcf", it[1]] } + .map{ it -> ["concat-${it[0]}-vcf", it[1]] } ) .mix(compress_index_VCF.out.index_out - .map{ it -> ["intersect-${it[0]}-index", it[2]] } + .map{ it -> ["concat-${it[0]}-index", it[2]] } ) - .mix(compress_MAF_vcf2maf.out.concat_maf_gz - .map{ it -> ["intersect-${file(it).getName().split('_')[0]}-maf", it]} + .mix(compress_file_blarchive.out.compressed_file + .map{ it -> ["concat-${it[0]}", it[1]]} ) generate_sha512sum(file_for_sha512) } diff --git a/module/somaticsniper-processes.nf b/module/somaticsniper-processes.nf index 8a743fc7..f7b6f5b1 100644 --- a/module/somaticsniper-processes.nf +++ b/module/somaticsniper-processes.nf @@ -6,7 +6,6 @@ log.info """\ Docker Images: - docker_image_somaticsniper: ${params.docker_image_somaticsniper} - docker_image_bam_readcount: ${params.docker_image_bam_readcount} - """ // Call SomaticSniper @@ -206,9 +205,6 @@ process create_ReadCountPosition_SomaticSniper { // Recommend to use the same mapping quality -q setting as SomaticSniper process generate_ReadCount_bam_readcount { container params.docker_image_bam_readcount - publishDir path: "${params.workflow_output_dir}/QC/${task.process.split(':')[-1]}", - mode: "copy", - pattern: "*.readcount" publishDir path: "${params.workflow_log_output_dir}", mode: "copy", pattern: ".command.*", diff --git a/module/somaticsniper.nf b/module/somaticsniper.nf index 7d7e5fff..1fa95f31 100644 --- a/module/somaticsniper.nf +++ b/module/somaticsniper.nf @@ -15,6 +15,10 @@ include { compress_index_VCF as compress_index_VCF_fix } from '../external/pipel bgzip_extra_args: params.bgzip_extra_args, tabix_extra_args: params.tabix_extra_args ]) +include { compress_file_blarchive} from './common' addParams( + blarchive_publishDir : "${params.workflow_output_dir}/intermediate/generate_ReadCount_bam_readcount", + blarchive_enabled : params.save_intermediate_files + ) workflow somaticsniper { take: @@ -51,14 +55,19 @@ workflow somaticsniper { generate_ReadCount_bam_readcount(params.reference,create_ReadCountPosition_SomaticSniper.out.snp_positions, tumor_bam, tumor_index) filter_FalsePositive_SomaticSniper(apply_TumorIndelFilter_SomaticSniper.out.vcf_tumor, generate_ReadCount_bam_readcount.out.readcount) call_HighConfidenceSNV_SomaticSniper(filter_FalsePositive_SomaticSniper.out.fp_pass) + compress_file_blarchive(generate_ReadCount_bam_readcount.out.readcount + .map{ it -> ['readcount', it] }) // rename_samples_BCFtools needs bgzipped input compress_index_VCF_hc(call_HighConfidenceSNV_SomaticSniper.out.hc .map{ it -> ['SNV', it] }) rename_samples_BCFtools(normal_id, tumor_id, compress_index_VCF_hc.out.index_out .map{ it -> [it[0], it[1]] }) compress_index_VCF_fix(rename_samples_BCFtools.out.fix_vcf) - file_for_sha512 = compress_index_VCF_fix.out.index_out.map{ it -> ["somaticsniper-${it[0]}-vcf", it[1]] } - .mix(compress_index_VCF_fix.out.index_out.map{ it -> ["somaticsniper-${it[0]}-index", it[2]] }) + file_for_sha512 = compress_index_VCF_fix.out.index_out + .map{ it -> ["${it[0]}-vcf", it[1]] } + .mix(compress_index_VCF_fix.out.index_out + .map{ it -> ["${it[0]}-index", it[2]] } + ) generate_sha512sum(file_for_sha512) emit: vcf = compress_index_VCF_fix.out.index_out.map{ it -> ["${it[1]}"] } diff --git a/test/config/a_mini-all-tools.config b/test/config/a_mini-all-tools.config index 8c2c2e41..bf5f9a4a 100644 --- a/test/config/a_mini-all-tools.config +++ b/test/config/a_mini-all-tools.config @@ -17,7 +17,7 @@ params { dataset_id = 'TWGSAMIN' // setting params.exome to TRUE will add the '--exome' option when running manta and strelka2 and the -E option when running MuSE exome = false - save_intermediate_files = true + save_intermediate_files = false // module options bgzip_extra_args = ''