Skip to content

Commit

Permalink
Merge pull request #203 from uclahs-cds/sfitz-compress-readcounts
Browse files Browse the repository at this point in the history
Sfitz compress readcounts
  • Loading branch information
sorelfitzgibbon authored Aug 30, 2023
2 parents ae5698f + a1b797e commit 1ac35bd
Show file tree
Hide file tree
Showing 9 changed files with 87 additions and 63 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]

### Added
- Add compression of `SomaticSniper` `bam-readcount` output and move to `intermediate` directory
- Add `ncbi_build` parameter
- Add conversion of concatenated VCF to MAF
- Add concatenation of consensus variants to one VCF
Expand All @@ -18,6 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add `split_VCF_bcftools` to `Mutect2` workflow, separating SNVs, MNVs and Indels

### Changed
- Remove redundant directories in Intersect log output directories
- Change compression of intersect MAF file to bzip2
- Update `README.md`
- Use `set_env` from `pipeline-Nextflow-config`
- Update resource allocation to include new processes
Expand Down
2 changes: 2 additions & 0 deletions config/default.config
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ params {
GATK_version = "4.4.0.0"
somaticsniper_version = "1.0.5.0"
bam_readcount_version = "0.8.0"
blarchive_version = "2.0.0"
strelka2_version = "2.9.10"
manta_version = "1.6.0"
MuSE_version = "2.0.2"
Expand All @@ -30,6 +31,7 @@ params {
docker_image_GATK = "broadinstitute/gatk:${params.GATK_version}"
docker_image_somaticsniper = "${-> params.docker_container_registry}/somaticsniper:${params.somaticsniper_version}"
docker_image_bam_readcount = "${-> params.docker_container_registry}/bam-readcount:${params.bam_readcount_version}"
docker_image_blarchive = "${-> params.docker_container_registry}/blarchive:${params.blarchive_version}"
docker_image_strelka2 = "${-> params.docker_container_registry}/strelka2:${params.strelka2_version}"
docker_image_manta = "${-> params.docker_container_registry}/manta:${params.manta_version}"
docker_image_MuSE = "${-> params.docker_container_registry}/muse:${params.MuSE_version}"
Expand Down
4 changes: 2 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ include { muse } from './module/muse' addParams(
[:]))

include { intersect } from './module/intersect' addParams(
workflow_output_dir: "${params.output_dir_base}/intersect-BCFtools-${params.BCFtools_version}",
workflow_log_output_dir: "${params.log_output_dir}/process-log/intersect-BCFtools-${params.BCFtools_version}",
workflow_output_dir: "${params.output_dir_base}/Intersect-BCFtools-${params.BCFtools_version}",
workflow_log_output_dir: "${params.log_output_dir}/process-log/Intersect-BCFtools-${params.BCFtools_version}",
output_filename: generate_standard_filename("BCFtools-${params.BCFtools_version}",
params.dataset_id,
params.sample_id,
Expand Down
29 changes: 29 additions & 0 deletions module/common.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ log.info """\
Docker Images:
- docker_image_BCFtools: ${params.docker_image_BCFtools}
- docker_image_validate_params: ${params.docker_image_validate_params}
- docker_image_blarchive: ${params.docker_image_blarchive}
"""

process generate_sha512sum {
Expand Down Expand Up @@ -63,3 +64,31 @@ process rename_samples_BCFtools {
bcftools reheader -s ${params.output_filename}_samples.txt --output ${params.output_filename}_${var_type}.vcf.gz ${vcf}
"""
}

process compress_file_blarchive {
container params.docker_image_blarchive
publishDir path: params.blarchive_publishDir,
mode: "copy",
pattern: "*.bz2",
enabled: params.blarchive_enabled
publishDir path: "${params.workflow_log_output_dir}",
mode: "copy",
pattern: ".command.*",
saveAs: { "${task.process.split(':')[-1]}-${file_type}/log${file(it).getName()}" }

input:
tuple val(file_type), path(file_to_compress)

output:
tuple val(file_type), path("*.bz2"), emit: compressed_file
path ".command.*"

script:
"""
set -euo pipefail
dereferenced_file=\$(readlink -f ${file_to_compress})
blarchive compress_files --input \$dereferenced_file \
--log ${params.work_dir}
ln -s \${dereferenced_file}.bz2 ${file_to_compress}.bz2
"""
}
75 changes: 27 additions & 48 deletions module/intersect-processes.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ log.info """\
Docker Images:
- docker_image_BCFtools: ${params.docker_image_BCFtools}
- docker_image_r_VennDiagram: ${params.docker_image_r_VennDiagram}
Intersect Options:
- ncbi_build: ${params.ncbi_build}
- vcf2maf_extra_args: ${params.vcf2maf_extra_args}
====================================
"""
process intersect_VCFs_BCFtools {
Expand All @@ -23,7 +26,7 @@ process intersect_VCFs_BCFtools {
publishDir path: "${params.workflow_log_output_dir}",
mode: "copy",
pattern: ".command.*",
saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" }
saveAs: { "${task.process.split(':')[-1]}/log${file(it).getName()}" }

input:
path vcfs
Expand Down Expand Up @@ -65,30 +68,30 @@ process intersect_VCFs_BCFtools {
"""
}

process plot_VennDiagram_R {
container params.docker_image_r_VennDiagram
publishDir path: "${params.workflow_output_dir}/output",
mode: "copy",
pattern: "*.tiff"
publishDir path: "${params.workflow_log_output_dir}",
mode: "copy",
pattern: ".command.*",
saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" }
process plot_VennDiagram_R {
container params.docker_image_r_VennDiagram
publishDir path: "${params.workflow_output_dir}/output",
mode: "copy",
pattern: "*.tiff"
publishDir path: "${params.workflow_log_output_dir}",
mode: "copy",
pattern: ".command.*",
saveAs: { "${task.process.split(':')[-1]}/log${file(it).getName()}" }

input:
path script_dir
path isec
input:
path script_dir
path isec

output:
path ".command.*"
path "*.tiff"
output:
path ".command.*"
path "*.tiff"

script:
"""
set -euo pipefail
Rscript ${script_dir}/plot-venn.R --isec_readme README.txt --isec_sites sites.txt --outfile ${params.output_filename}_Venn-diagram.tiff
"""
}
script:
"""
set -euo pipefail
Rscript ${script_dir}/plot-venn.R --isec_readme README.txt --isec_sites sites.txt --outfile ${params.output_filename}_Venn-diagram.tiff
"""
}

process concat_VCFs_BCFtools {
container params.docker_image_BCFtools
Expand All @@ -99,7 +102,7 @@ process concat_VCFs_BCFtools {
publishDir path: "${params.workflow_log_output_dir}",
mode: "copy",
pattern: ".command.*",
saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" }
saveAs: { "${task.process.split(':')[-1]}/log${file(it).getName()}" }

input:
path vcfs
Expand Down Expand Up @@ -134,7 +137,7 @@ process convert_VCF_vcf2maf {
publishDir path: "${params.workflow_log_output_dir}",
mode: "copy",
pattern: ".command.*",
saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" }
saveAs: { "${task.process.split(':')[-1]}/log${file(it).getName()}" }

input:
path vcf
Expand All @@ -160,27 +163,3 @@ process convert_VCF_vcf2maf {
${params.vcf2maf_extra_args}
"""
}

process compress_MAF_vcf2maf {
container params.docker_image_vcf2maf
publishDir path: "${params.workflow_output_dir}/output",
mode: "copy",
pattern: "*.gz"
publishDir path: "${params.workflow_log_output_dir}",
mode: "copy",
pattern: ".command.*",
saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" }

input:
path maf

output:
path "*.gz", emit: concat_maf_gz
path ".command.*"

script:
"""
set -euo pipefail
gzip --stdout ${maf} > ${maf}.gz
"""
}
18 changes: 12 additions & 6 deletions module/intersect.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
include { generate_sha512sum } from './common'
include { intersect_VCFs_BCFtools; plot_VennDiagram_R; concat_VCFs_BCFtools ; convert_VCF_vcf2maf; compress_MAF_vcf2maf } from './intersect-processes.nf'
include { compress_file_blarchive} from './common' addParams(
blarchive_publishDir : "${params.workflow_output_dir}/output",
blarchive_enabled : true
)
include { intersect_VCFs_BCFtools; plot_VennDiagram_R; concat_VCFs_BCFtools ; convert_VCF_vcf2maf } from './intersect-processes.nf'
include { compress_index_VCF } from '../external/pipeline-Nextflow-module/modules/common/index_VCF_tabix/main.nf' addParams(
options: [
output_dir: params.workflow_output_dir,
Expand Down Expand Up @@ -51,7 +55,9 @@ workflow intersect {
compress_index_VCF(concat_VCFs_BCFtools.out.concat_vcf
.map{ it -> ['SNV', it]}
)
compress_MAF_vcf2maf(convert_VCF_vcf2maf.out.concat_maf)
compress_file_blarchive(convert_VCF_vcf2maf.out.concat_maf
.map{ it -> ['MAF', it]}
)
file_for_sha512 = intersect_VCFs_BCFtools.out.intersect_vcf
.flatten()
.map{ it -> ["${file(it).getName().split('_')[0]}-SNV-vcf", it]}
Expand All @@ -60,13 +66,13 @@ workflow intersect {
.map{ it -> ["${file(it).getName().split('_')[0]}-SNV-idx", it]}
)
.mix(compress_index_VCF.out.index_out
.map{ it -> ["intersect-${it[0]}-vcf", it[1]] }
.map{ it -> ["concat-${it[0]}-vcf", it[1]] }
)
.mix(compress_index_VCF.out.index_out
.map{ it -> ["intersect-${it[0]}-index", it[2]] }
.map{ it -> ["concat-${it[0]}-index", it[2]] }
)
.mix(compress_MAF_vcf2maf.out.concat_maf_gz
.map{ it -> ["intersect-${file(it).getName().split('_')[0]}-maf", it]}
.mix(compress_file_blarchive.out.compressed_file
.map{ it -> ["concat-${it[0]}", it[1]]}
)
generate_sha512sum(file_for_sha512)
}
4 changes: 0 additions & 4 deletions module/somaticsniper-processes.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ log.info """\
Docker Images:
- docker_image_somaticsniper: ${params.docker_image_somaticsniper}
- docker_image_bam_readcount: ${params.docker_image_bam_readcount}
"""

// Call SomaticSniper
Expand Down Expand Up @@ -206,9 +205,6 @@ process create_ReadCountPosition_SomaticSniper {
// Recommend to use the same mapping quality -q setting as SomaticSniper
process generate_ReadCount_bam_readcount {
container params.docker_image_bam_readcount
publishDir path: "${params.workflow_output_dir}/QC/${task.process.split(':')[-1]}",
mode: "copy",
pattern: "*.readcount"
publishDir path: "${params.workflow_log_output_dir}",
mode: "copy",
pattern: ".command.*",
Expand Down
13 changes: 11 additions & 2 deletions module/somaticsniper.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ include { compress_index_VCF as compress_index_VCF_fix } from '../external/pipel
bgzip_extra_args: params.bgzip_extra_args,
tabix_extra_args: params.tabix_extra_args
])
include { compress_file_blarchive} from './common' addParams(
blarchive_publishDir : "${params.workflow_output_dir}/intermediate/generate_ReadCount_bam_readcount",
blarchive_enabled : params.save_intermediate_files
)

workflow somaticsniper {
take:
Expand Down Expand Up @@ -51,14 +55,19 @@ workflow somaticsniper {
generate_ReadCount_bam_readcount(params.reference,create_ReadCountPosition_SomaticSniper.out.snp_positions, tumor_bam, tumor_index)
filter_FalsePositive_SomaticSniper(apply_TumorIndelFilter_SomaticSniper.out.vcf_tumor, generate_ReadCount_bam_readcount.out.readcount)
call_HighConfidenceSNV_SomaticSniper(filter_FalsePositive_SomaticSniper.out.fp_pass)
compress_file_blarchive(generate_ReadCount_bam_readcount.out.readcount
.map{ it -> ['readcount', it] })
// rename_samples_BCFtools needs bgzipped input
compress_index_VCF_hc(call_HighConfidenceSNV_SomaticSniper.out.hc
.map{ it -> ['SNV', it] })
rename_samples_BCFtools(normal_id, tumor_id, compress_index_VCF_hc.out.index_out
.map{ it -> [it[0], it[1]] })
compress_index_VCF_fix(rename_samples_BCFtools.out.fix_vcf)
file_for_sha512 = compress_index_VCF_fix.out.index_out.map{ it -> ["somaticsniper-${it[0]}-vcf", it[1]] }
.mix(compress_index_VCF_fix.out.index_out.map{ it -> ["somaticsniper-${it[0]}-index", it[2]] })
file_for_sha512 = compress_index_VCF_fix.out.index_out
.map{ it -> ["${it[0]}-vcf", it[1]] }
.mix(compress_index_VCF_fix.out.index_out
.map{ it -> ["${it[0]}-index", it[2]] }
)
generate_sha512sum(file_for_sha512)
emit:
vcf = compress_index_VCF_fix.out.index_out.map{ it -> ["${it[1]}"] }
Expand Down
2 changes: 1 addition & 1 deletion test/config/a_mini-all-tools.config
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ params {
dataset_id = 'TWGSAMIN'
// setting params.exome to TRUE will add the '--exome' option when running manta and strelka2 and the -E option when running MuSE
exome = false
save_intermediate_files = true
save_intermediate_files = false

// module options
bgzip_extra_args = ''
Expand Down

0 comments on commit 1ac35bd

Please sign in to comment.