From 3d1f16de73770b7b8ef9381d14eb5a3b5887901b Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Wed, 24 Jul 2024 10:08:25 -0700 Subject: [PATCH 01/42] Rename workflow to snv_annotations, absorb Funcotator --- main.nf | 17 +++---- module/extract_features.nf | 2 +- module/funcotator.nf | 44 ----------------- module/{annotations.nf => snv_annotations.nf} | 48 +++++++++++++++++-- 4 files changed, 52 insertions(+), 59 deletions(-) delete mode 100644 module/funcotator.nf rename module/{annotations.nf => snv_annotations.nf} (78%) diff --git a/main.nf b/main.nf index 1e45a85..ddd0a1e 100644 --- a/main.nf +++ b/main.nf @@ -11,8 +11,7 @@ include { run_validate_PipeVal_with_metadata } from './external/pipeline-Nextflo ) include { run_liftover_BCFtools } from './module/liftover.nf' -include { run_Funcotator_GATK } from './module/funcotator.nf' -include { workflow_apply_annotations } from './module/annotations.nf' +include { workflow_apply_snv_annotations } from './module/snv_annotations.nf' include { workflow_extract_features} from './module/extract_features.nf' // Log info here @@ -142,6 +141,8 @@ workflow { .set { validated_vcf_with_index } // The values of validated_vcf_with_index are maps with keys vcf, index, and sample_id. + + // Step 1: Liftover run_liftover_BCFtools( validated_vcf_with_index.map { [it.sample_id, it.vcf, it.index] }, input_ch_src_sequence, @@ -149,18 +150,14 @@ workflow { Channel.value(params.chain_file) ) - run_Funcotator_GATK( + // Step 2: Annotate + workflow_apply_snv_annotations( run_liftover_BCFtools.out.liftover_vcf_with_index, - input_ch_dest_sequence, - Channel.value(params.funcotator_data.data_source) - ) - - workflow_apply_annotations( - run_Funcotator_GATK.out.funcotator_vcf, input_ch_dest_sequence ) + // Step 3: Analyze workflow_extract_features( - workflow_apply_annotations.out.annotated_vcf + workflow_apply_snv_annotations.out.annotated_vcf ) } diff --git a/module/extract_features.nf b/module/extract_features.nf index d5b385f..f7b22bc 100644 --- a/module/extract_features.nf +++ b/module/extract_features.nf @@ -1,4 +1,4 @@ -include { compress_and_index_HTSlib } from './annotations.nf' +include { compress_and_index_HTSlib } from './snv_annotations.nf' process extract_VCF_features_StableLift { container params.docker_image_stablelift diff --git a/module/funcotator.nf b/module/funcotator.nf deleted file mode 100644 index 1917ae0..0000000 --- a/module/funcotator.nf +++ /dev/null @@ -1,44 +0,0 @@ -/* -* Module/process description here -* -* @input -* @params -* @output -*/ - -include { generate_standard_filename } from '../external/pipeline-Nextflow-module/modules/common/generate_standardized_filename/main.nf' - -process run_Funcotator_GATK { - container params.docker_image_gatk - - publishDir path: "${intermediate_filepath}", - pattern: "output.vcf.gz", - mode: "copy", - enabled: params.save_intermediate_files, - saveAs: { "${slug}.vcf.gz" } - - input: - tuple val(sample_id), - path(vcf, stageAs: 'inputs/*'), - path(index, stageAs: 'inputs/*') - tuple val(dest_fasta_id), path(dest_fasta_ref), path(dest_fasta_fai), path(dest_fasta_dict) - path (funcotator_sources) - - output: - tuple val(sample_id), path('output.vcf.gz'), emit: funcotator_vcf - - script: - intermediate_filepath = "${params.output_dir_base}/GATK-${params.gatk_version}/intermediate/${task.process}" - - slug = "Funcotator-${sample_id}-${dest_fasta_id}" - - """ - gatk Funcotator \ - --variant "${vcf}" \ - --reference "${dest_fasta_ref}" \ - --ref-version "${dest_fasta_id}" \ - --data-sources-path "${funcotator_sources}" \ - --output-file-format VCF \ - --output "output.vcf.gz" - """ -} diff --git a/module/annotations.nf b/module/snv_annotations.nf similarity index 78% rename from module/annotations.nf rename to module/snv_annotations.nf index 2295c79..705855f 100644 --- a/module/annotations.nf +++ b/module/snv_annotations.nf @@ -1,3 +1,38 @@ +process run_Funcotator_GATK { + container params.docker_image_gatk + + publishDir path: "${intermediate_filepath}", + pattern: "output.vcf.gz", + mode: "copy", + enabled: params.save_intermediate_files, + saveAs: { "${slug}.vcf.gz" } + + input: + tuple val(sample_id), + path(vcf, stageAs: 'inputs/*'), + path(index, stageAs: 'inputs/*') + tuple val(dest_fasta_id), path(dest_fasta_ref), path(dest_fasta_fai), path(dest_fasta_dict) + path (funcotator_sources) + + output: + tuple val(sample_id), path('output.vcf.gz'), emit: funcotator_vcf + + script: + intermediate_filepath = "${params.output_dir_base}/GATK-${params.gatk_version}/intermediate/${task.process}" + + slug = "Funcotator-${sample_id}-${dest_fasta_id}" + + """ + gatk Funcotator \ + --variant "${vcf}" \ + --reference "${dest_fasta_ref}" \ + --ref-version "${dest_fasta_id}" \ + --data-sources-path "${funcotator_sources}" \ + --output-file-format VCF \ + --output "output.vcf.gz" + """ +} + process annotate_RepeatMasker_BCFtools { container params.docker_image_bcftools @@ -141,16 +176,21 @@ process annotate_trinucleotide_BCFtools { """ } - - -workflow workflow_apply_annotations { +workflow workflow_apply_snv_annotations { take: vcf_with_sample_id dest_fasta_data main: - annotate_RepeatMasker_BCFtools( + + run_Funcotator_GATK( vcf_with_sample_id, + dest_fasta_data, + Channel.value(params.funcotator_data.data_source) + ) + + annotate_RepeatMasker_BCFtools( + run_Funcotator_GATK.out.funcotator_vcf, Channel.value(params.repeat_bed) ) From 7b773abbe8d625687b0ec37f5d6c419424e3b423 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Wed, 24 Jul 2024 10:15:58 -0700 Subject: [PATCH 02/42] s/RepeatMasker-v3.0.1/RepeatMasker-3.0.1/ --- config/template.config | 2 +- test/nftest.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config/template.config b/config/template.config index 06c8741..6746bc5 100644 --- a/config/template.config +++ b/config/template.config @@ -36,7 +36,7 @@ params { chain_file = "/hot/ref/tool-specific-input/liftOver/hg19ToHg38.over.chain" // FIXME How to describe this file? - repeat_bed = "/hot/ref/database/RepeatMasker-v3.0.1/processed/GRCh38/GRCh38_RepeatMasker_intervals.bed" + repeat_bed = "/hot/ref/database/RepeatMasker-3.0.1/processed/GRCh38/GRCh38_RepeatMasker_intervals.bed" } // Setup the pipeline config. DO NOT REMOVE THIS LINE! diff --git a/test/nftest.config b/test/nftest.config index 2c9cbff..30ddb6e 100644 --- a/test/nftest.config +++ b/test/nftest.config @@ -30,7 +30,7 @@ params { // references chain_file = "/hot/ref/tool-specific-input/liftOver/hg19ToHg38.over.chain" - repeat_bed = "/hot/ref/database/RepeatMasker-v3.0.1/processed/GRCh38/GRCh38_RepeatMasker_intervals.bed" + repeat_bed = "/hot/ref/database/RepeatMasker-3.0.1/processed/GRCh38/GRCh38_RepeatMasker_intervals.bed" } // Setup the pipeline config. DO NOT REMOVE THIS LINE! From ad98a45f1bd11398da199262503d301316563f8c Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Wed, 24 Jul 2024 10:19:57 -0700 Subject: [PATCH 03/42] Use stablelift image from main --- config/default.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/default.config b/config/default.config index 0ee67bd..b4fb07c 100644 --- a/config/default.config +++ b/config/default.config @@ -22,7 +22,7 @@ params { gatk_version = '4.2.4.1' pipeval_version = '5.0.0-rc.3' samtools_version = '1.20' - stablelift_version = 'branch-nwiltsie-bootstrap' // FIXME + stablelift_version = 'dev' // FIXME docker_image_bcftools = "${-> params.docker_container_registry}/bcftools-score:${params.bcftools_version}" docker_image_bedtools = "${-> params.docker_container_registry}/bedtools:${params.bedtools_version}" From dae4905cb0346ea1f148ebc0b486bc4043059767 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Wed, 24 Jul 2024 12:33:41 -0700 Subject: [PATCH 04/42] Add original copy of extract-vcf-features-SV.R --- module/scripts/extract-vcf-features-SV.R | 182 +++++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 module/scripts/extract-vcf-features-SV.R diff --git a/module/scripts/extract-vcf-features-SV.R b/module/scripts/extract-vcf-features-SV.R new file mode 100644 index 0000000..c10fe9e --- /dev/null +++ b/module/scripts/extract-vcf-features-SV.R @@ -0,0 +1,182 @@ +#!/usr/bin/env Rscript +# extract-vcf-features-SV.R +#################################################################################################### +# +# Extract features from vcf +# Intersect and annotate with gnomAD-SV vcf +# +#################################################################################################### + +suppressPackageStartupMessages({ + library(vcfR); + library(data.table); + library(argparse); + library(GenomicRanges); + }); + +################################################################################################### +# Input +################################################################################################### +# Define command line arguments +parser <- ArgumentParser(); +parser$add_argument('--variant-caller', type = 'character', help = ''); +parser$add_argument('--input-vcf', type = 'character', help = 'Delly2 vcf'); +parser$add_argument('--gnomad-rds', type = 'character', help = 'gnomAD Rds file'); +args <- parser$parse_args(); + +# Save command line arguments +for (arg in names(args)) { + assign(gsub('_', '.', arg), args[[arg]]); + } + +# Set parameters for interactive runs +if (interactive()) { + variant.caller <- 'Delly2'; + input.vcf <- '/hot/project/method/AlgorithmEvaluation/BNCH-000142-GRCh37v38/sSV/stableLift/train_CPCG-40QC_Delly2/CPCG-40QC_Delly2_LiftOver-GRCh38.vcf.gz'; + gnomad.rds <- '/hot/code/nkwang/GitHub/uclahs-cds/project-method-AlgorithmEvaluation-BNCH-000142-GRCh37v38/report/manuscript/publish/data/gnomad.v4.0.sv.Rds'; + } + +output.path <- sub('\\..*$', '_annotated.Rds', input.vcf); + +################################################################################################### +# Functions +################################################################################################### +vcf.info.to.dt <- function(vcf.info) { + # Split each string by semicolon and convert to a list of key-value pairs + vcf.info <- strsplit(vcf.info, ';'); + vcf.info <- lapply(vcf.info, function(x) { + x <- strsplit(x, '='); + as.list(stats::setNames(sapply(x, `[`, 2), sapply(x, `[`, 1))); + }) + + # Combine the list of key-value pairs into a data table + rbindlist(vcf.info, fill = TRUE); + } + +calculate.VAF <- function(GT.row) { + total <- sum(GT.row %in% c('0/0', '0/1', '1/1'), na.rm = TRUE) * 2; + alt <- sum(GT.row == '0/1', na.rm = TRUE) + sum(GT.row == '1/1', na.rm = TRUE) * 2; + return(alt / total); + } + +get.overlap <- function(start1, end1, start2, end2) { + max.length <- pmax((end1 - start1), (end2 - start2)); + overlap.length <- pmin(end1, end2) - pmax(start1, start2); + return(overlap.length / max.length); + } + +find.SV.match <- function(this.ID, input, reference, overlap, offset) { + # Match SV type and CHR + this.variant <- input[ID == this.ID]; + reference <- reference[SVTYPE == this.variant$SVTYPE & CHROM == this.variant$CHROM]; + + if (this.variant$SVTYPE == 'BND') { + # reference[, OFFSET := abs(POS - this.variant$POS) + abs(POS2 - this.variant$POS2)]; + reference[, OFFSET := abs(POS - this.variant$POS)]; + reference <- reference[OFFSET < offset & CHR2 == this.variant$CHR2][order(OFFSET)]; + } else { + reference[, OVERLAP := get.overlap(POS, END, this.variant$POS, this.variant$END)]; + reference <- reference[OVERLAP > overlap][order(OVERLAP, decreasing = TRUE)]; + } + + return(list(gnomad.match.ID = reference[1, ID], gnomad.matches = nrow(reference))); + } + +################################################################################################### +# Load files +################################################################################################### +input.vcf <- read.vcfR(input.vcf); +features.dt.gnomad <- readRDS(gnomad.rds); + +################################################################################################### +# Data preprocessing +################################################################################################### +# Convert variant information into dt +input.info <- vcf.info.to.dt(input.vcf@fix[, 'INFO']); +input.fix <- as.data.table(input.vcf@fix); +features.dt <- cbind(input.fix[, -c('INFO')], input.info); + +# Format columns +features.dt[, CONSENSUS := NULL]; +numeric.columns <- c('POS', 'QUAL', 'END', 'PE', 'MAPQ', 'SRMAPQ', 'INSLEN', 'HOMLEN', 'SR', 'SRQ', 'CE', 'RDRATIO', 'SVLEN', 'POS2'); +features.dt[, (numeric.columns) := lapply(.SD, as.numeric), .SDcols = numeric.columns]; + +# Extract and aggregate per sample GT fields +gt.fields <- c('GQ', 'RC', 'RDCN', 'DR', 'DV', 'RR', 'RV'); +for (field in gt.fields) { + features.dt[, (field) := apply(extract.gt(input.vcf, element = ..field, as.numeric = TRUE), 1, mean, na.rm = TRUE)]; + } +features.dt[, COHORT_AF := apply(extract.gt(input.vcf, element = 'GT'), 1, calculate.VAF)]; +features.dt[!SVTYPE %in% c('BND', 'INS'), SVLEN := END - POS + 1]; +features.dt[, CIPOS := as.numeric(sapply(CIPOS, function(x) unlist(strsplit(x, ','))[2]))]; + +################################################################################################### +# Intersect variants with gnomAD SVs +################################################################################################### +start.time <- Sys.time(); + +# features.dt <- features.dt[1:100]; +features.dt[, c('gnomad.match.ID', 'gnomad.matches') := rbindlist(lapply(ID, find.SV.match, input = features.dt, reference = features.dt.gnomad, overlap = 0.8, offset = 500))]; + +gnomad.features <- c('ID', 'AF', 'POPMAX_AF', 'NCR'); +features.dt <- merge(features.dt, features.dt.gnomad[, ..gnomad.features], all.x = TRUE, by.x = 'gnomad.match.ID', by.y = 'ID'); + +cat(format(Sys.time() - start.time, nsmall = 2), '\n'); + +################################################################################################### +# Format features for RF +################################################################################################### +continuous.features <- c( + 'POS', + 'QUAL', + 'END', + 'PE', + 'MAPQ', + 'CIPOS', + 'SRMAPQ', + 'HOMLEN', + 'SR', + 'SRQ', + 'CE', + 'RDRATIO', + 'SVLEN', + 'GQ', + 'RC', + 'RDCN', + 'DR', + 'DV', + 'RR', + 'RV', + 'gnomad.matches', + 'AF', + 'POPMAX_AF', + 'NCR' + ); + +categorical.features <- c( + 'CHROM', + 'SVTYPE', + 'CT' + ); + +# Extract features and format +continuous.features <- continuous.features[continuous.features %in% names(features.dt)]; +categorical.features <- categorical.features[categorical.features %in% names(features.dt)]; +all.features <- c(continuous.features, categorical.features, 'ID'); + +features.dt <- features.dt[, ..all.features]; +features.dt[, (continuous.features) := lapply(.SD, as.numeric), .SDcols = continuous.features]; +features.dt[, (continuous.features) := lapply(.SD, function(x) ifelse(is.na(x), 0, x)), .SDcols = continuous.features]; +features.dt[, (categorical.features) := lapply(.SD, function(x) ifelse(is.na(x), '', x)), .SDcols = categorical.features]; +features.dt[, (categorical.features) := lapply(.SD, as.factor), .SDcols = categorical.features]; +names(features.dt) <- make.names(names(features.dt)); + +# Remove rows with NA +features.dt.rows <- nrow(features.dt); +features.dt <- features.dt[apply(features.dt, 1, function(x) !any(is.na(x))), ]; +cat('Removed', features.dt.rows - nrow(features.dt), 'rows with missing data\n'); + +################################################################################################### +# Save features.dt for input to RF +################################################################################################### +saveRDS(features.dt, output.path); From 5df8c97048f3ebd0d3288b0d5ade21586fc43e6e Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Wed, 24 Jul 2024 12:35:09 -0700 Subject: [PATCH 05/42] Add --output-rds argument --- module/scripts/extract-vcf-features-SV.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/module/scripts/extract-vcf-features-SV.R b/module/scripts/extract-vcf-features-SV.R index c10fe9e..9927a7e 100644 --- a/module/scripts/extract-vcf-features-SV.R +++ b/module/scripts/extract-vcf-features-SV.R @@ -21,6 +21,7 @@ suppressPackageStartupMessages({ parser <- ArgumentParser(); parser$add_argument('--variant-caller', type = 'character', help = ''); parser$add_argument('--input-vcf', type = 'character', help = 'Delly2 vcf'); +parser$add_argument('--output-rds', type = 'character', help = 'Rds output for use in RF model'); parser$add_argument('--gnomad-rds', type = 'character', help = 'gnomAD Rds file'); args <- parser$parse_args(); @@ -36,8 +37,6 @@ if (interactive()) { gnomad.rds <- '/hot/code/nkwang/GitHub/uclahs-cds/project-method-AlgorithmEvaluation-BNCH-000142-GRCh37v38/report/manuscript/publish/data/gnomad.v4.0.sv.Rds'; } -output.path <- sub('\\..*$', '_annotated.Rds', input.vcf); - ################################################################################################### # Functions ################################################################################################### @@ -179,4 +178,4 @@ cat('Removed', features.dt.rows - nrow(features.dt), 'rows with missing data\n') ################################################################################################### # Save features.dt for input to RF ################################################################################################### -saveRDS(features.dt, output.path); +saveRDS(features.dt, output.rds); From 4716ac52ca0017ed0923e140103e3aeae3bdf37c Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Wed, 24 Jul 2024 12:37:08 -0700 Subject: [PATCH 06/42] Add workflow for SV --- module/sv_workflow.nf | 92 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 module/sv_workflow.nf diff --git a/module/sv_workflow.nf b/module/sv_workflow.nf new file mode 100644 index 0000000..7fcd089 --- /dev/null +++ b/module/sv_workflow.nf @@ -0,0 +1,92 @@ + +process run_sv_liftover{ + container params.docker_image_stablelift + + publishDir path: "${intermediate_filepath}", + pattern: "liftover.vcf.gz", + mode: "copy", + enabled: params.save_intermediate_files, + saveAs: { "${slug}.vcf.gz" } + + input: + tuple val(sample_id), + path(vcf, stageAs: 'inputs/*'), + path(index, stageAs: 'inputs/*') + path (header_contigs) + path (chain_file) + + output: + tuple val(sample_id), path('liftover.vcf.gz'), emit: liftover_vcf + + script: + // FIXME Use a more standard path + intermediate_path = "${params.output_dir_base}/StableLift-${params.stable_version}/intermediate/${task.process}" + + slug = "LiftOver-${sample_id}" + + """ + Rscript "${moduleDir}/scripts/liftover-Delly2-vcf.R \ + --input-vcf ${vcf} \ + --header-contigs ${header_contigs} \ + --chain-file ${chain_file} \ + --output "liftover.vcf.gz" + """ +} + +process run_intersect_gnomad { + container params.docker_image_stablelift + + publishDir path: "${intermediate_filepath}", + pattern: "annotations.Rds", + mode: "copy", + enabled: params.save_intermediate_files, + saveAs: { "${slug}.Rds" } + + input: + tuple val(sample_id), path(vcf, stageAs: 'inputs/*') + path (gnomad_rds) + val (variant_caller) + + output: + tuple val(sample_id), path('annotations.Rds'), emit: r_annotations + + script: + // FIXME Use a more standard path + intermediate_path = "${params.output_dir_base}/StableLift-${params.stable_version}/intermediate/${task.process}" + + slug = "LiftOver-${sample_id}-${variant_caller}" + + """ + Rscript ${moduleDir}/scripts/publish/extract-vcf-features-SV.R \ + --variant-caller "${variant_caller}" \ + --input-vcf "${vcf}" \ + --output-rds "annotations.Rds" \ + --gnomad-rds ${gnomad_rds} + """ +} + +workflow workflow_extract_sv_annotations { + take: + vcf_with_sample_id + header_contigs + gnomad_rds + chain_file + variant_caller + + main: + + run_sv_liftover( + vcf_with_sample_id, + header_contigs, + chain_file + ) + + run_intersect_gnomad( + run_sv_liftover.out.liftover_vcf, + gnomad_rds, + variant_caller + ) + + emit: + r_annotations = run_intersect_gnomad.out.r_annotations +} From 8bbcfe23757f2fc76e4e59e9383a4c063edf5115 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Wed, 24 Jul 2024 13:21:42 -0700 Subject: [PATCH 07/42] Refactor, support SV and SNV --- config/template.config | 7 + main.nf | 56 ++++--- ...tract_features.nf => predict_stability.nf} | 16 +- module/scripts/liftover-Delly2-vcf.R | 157 ++++++++++++++++++ module/snv_annotations.nf | 33 +--- module/{liftover.nf => snv_workflow.nf} | 75 ++++++++- module/sv_workflow.nf | 3 + module/utils.nf | 30 ++++ 8 files changed, 308 insertions(+), 69 deletions(-) rename module/{extract_features.nf => predict_stability.nf} (89%) create mode 100644 module/scripts/liftover-Delly2-vcf.R rename module/{liftover.nf => snv_workflow.nf} (52%) create mode 100644 module/utils.nf diff --git a/config/template.config b/config/template.config index 6746bc5..e30e086 100644 --- a/config/template.config +++ b/config/template.config @@ -37,6 +37,13 @@ params { // FIXME How to describe this file? repeat_bed = "/hot/ref/database/RepeatMasker-3.0.1/processed/GRCh38/GRCh38_RepeatMasker_intervals.bed" + + // SV files + // FIXME Should this be bundled? + header_contigs = "/hot/code/nkwang/GitHub/uclahs-cds/project-method-AlgorithmEvaluation-BNCH-000142-GRCh37v38/report/manuscript/publish/GRCh38-vcf-header-contigs.txt" + + // FIXME Should this be bundled? + gnomad_rds = "/hot/code/nkwang/GitHub/uclahs-cds/project-method-AlgorithmEvaluation-BNCH-000142-GRCh37v38/report/manuscript/publish/data/gnomad.v4.0.sv.Rds" } // Setup the pipeline config. DO NOT REMOVE THIS LINE! diff --git a/main.nf b/main.nf index ddd0a1e..272e531 100644 --- a/main.nf +++ b/main.nf @@ -10,9 +10,9 @@ include { run_validate_PipeVal_with_metadata } from './external/pipeline-Nextflo ] ) -include { run_liftover_BCFtools } from './module/liftover.nf' -include { workflow_apply_snv_annotations } from './module/snv_annotations.nf' -include { workflow_extract_features} from './module/extract_features.nf' +include { workflow_extract_sv_annotations } from './module/sv_workflow.nf' +include { workflow_extract_snv_annotations } from './module/snv_workflow.nf' +include { workflow_predict_stability } from './module/predict_stability.nf' // Log info here log.info """\ @@ -138,26 +138,44 @@ workflow { .map { filename, metadata -> [metadata[0].sample_id, metadata[0] + [(metadata[1]): filename]] } .groupTuple() .map { it[1].inject([:]) { result, i -> result + i } } - .set { validated_vcf_with_index } + .tap { validated_vcf_with_index } + .map { [it.sample_id, it.vcf, it.index] } + .set { validated_vcf_tuple } // The values of validated_vcf_with_index are maps with keys vcf, index, and sample_id. + // The values of validated_vcf_tuple are tuples of (sample_id, vcf, index). + + if (params.variant_caller == "Delly2") { + // Take the SV branch + workflow_extract_sv_annotations( + validated_vcf_tuple, + Channel.value(header_contigs), + Channel.value(gnomad_rds), + Channel.value(chain_file), + Channel.value(variant_caller) + ) - // Step 1: Liftover - run_liftover_BCFtools( - validated_vcf_with_index.map { [it.sample_id, it.vcf, it.index] }, - input_ch_src_sequence, - input_ch_dest_sequence, - Channel.value(params.chain_file) - ) + workflow_extract_sv_annotations.out.r_annotations.set { liftover_vcf } + workflow_extract_sv_annotations.out.r_annotations.set { r_annotations } + + } else { + // Take the SNV branch + workflow_extract_snv_annotations( + validated_vcf_tuple, + input_ch_src_sequence, + input_ch_dest_sequence, + Channel.value(params.chain_file), + Channel.value(variant_caller) + ) - // Step 2: Annotate - workflow_apply_snv_annotations( - run_liftover_BCFtools.out.liftover_vcf_with_index, - input_ch_dest_sequence - ) + workflow_extract_snv_annotations.out.r_annotations.set { liftover_vcf } + workflow_extract_snv_annotations.out.r_annotations.set { r_annotations } + } - // Step 3: Analyze - workflow_extract_features( - workflow_apply_snv_annotations.out.annotated_vcf + // Predict stability and apply annotate lifted VCF + workflow_predict_stability( + liftover_vcf, + r_annotations, + Channel.value(params.rf_model) ) } diff --git a/module/extract_features.nf b/module/predict_stability.nf similarity index 89% rename from module/extract_features.nf rename to module/predict_stability.nf index f7b22bc..b66118b 100644 --- a/module/extract_features.nf +++ b/module/predict_stability.nf @@ -1,4 +1,4 @@ -include { compress_and_index_HTSlib } from './snv_annotations.nf' +include { compress_and_index_HTSlib } from './utils.nf' process extract_VCF_features_StableLift { container params.docker_image_stablelift @@ -109,21 +109,17 @@ process run_apply_stability_annotations { """ } -workflow workflow_extract_features { +workflow workflow_predict_stability { take: vcf_with_sample_id + r_annotations + rf_model main: - if (params.variant_caller == "HaplotypeCaller") { - error "HaplotypeCaller is not supported yet" - } else { - extract_VCF_features_StableLift(vcf_with_sample_id) - extract_VCF_features_StableLift.out.r_annotations.set { ch_annotations } - } predict_stability_StableLift( - ch_annotations, - Channel.value(params.rf_model) + r_annotations, + rf_model ) compress_and_index_HTSlib( diff --git a/module/scripts/liftover-Delly2-vcf.R b/module/scripts/liftover-Delly2-vcf.R new file mode 100644 index 0000000..827a3b1 --- /dev/null +++ b/module/scripts/liftover-Delly2-vcf.R @@ -0,0 +1,157 @@ +#!/usr/bin/env Rscript +# liftover-Delly2-vcf.R +################################################################################################### +# +# +# +################################################################################################### + +suppressPackageStartupMessages({ + library(vcfR); + library(data.table); + library(argparse); + library(rtracklayer); + }); + +################################################################################################### +# Input +################################################################################################### +# Define command line arguments +parser <- ArgumentParser(); +parser$add_argument('--input-vcf', type = 'character', help = 'GRCh37 Delly2 vcf'); +parser$add_argument('--header-contigs', type = 'character', help = 'Directory with vcf subsets'); +parser$add_argument('--chain-file', type = 'character', help = 'hg19ToHg38.over.chain file'); +parser$add_argument('--output', type = 'character', help = 'Where to write lifted vcf'); +args <- parser$parse_args(); + +# Save command line arguments +for (arg in names(args)) { + assign(gsub('_', '.', arg), args[[arg]]); + } + +# Set parameters for interactive runs +if (interactive()) { + # input.vcf <- '/hot/project/method/AlgorithmEvaluation/BNCH-000142-GRCh37v38/gSV/bcftools-merge/CPCG-40QC_GRCh37/CPCG-40QC_GRCh37_regenotype-gSV_delly_bcftools-merge_delly-filter-germline.vcf.gz'; + input.vcf <- '/hot/project/method/AlgorithmEvaluation/BNCH-000142-GRCh37v38/sSV/bcftools-merge/CPCG-40QC_GRCh37/CPCG-40QC_GRCh37_call-sSV_delly_bcftools-merge_somatic-only.vcf.gz'; + header.contigs <- '/hot/code/nkwang/GitHub/uclahs-cds/project-method-AlgorithmEvaluation-BNCH-000142-GRCh37v38/report/manuscript/publish/GRCh38-vcf-header-contigs.txt'; + chain.file <- '/hot/resource/genomics/liftover_chain_files/hg19ToHg38.over.chain'; + output <- '/hot/project/method/AlgorithmEvaluation/BNCH-000142-GRCh37v38/sSV/stableLift/train_CPCG-40QC_Delly2/CPCG-40QC_Delly2_LiftOver-GRCh38.vcf.gz'; + } + +################################################################################################### +# Functions +################################################################################################### +vcf.fix.to.dt <- function(vcf.fix) { + vcf.fix <- as.data.table(vcf.fix); + vcf.info <- vcf.info.to.dt(vcf.fix$INFO); + cbind(vcf.fix[, -'INFO'], vcf.info); + } + +# vcfR::getINFO() to data.table +vcf.info.to.dt <- function(vcf.info) { + vcf.info <- lapply(vcf.info, function(x) vcf.info.string.to.list(x)); + feature.names <- unique(unlist(lapply(vcf.info, names))); + vcf.info <- do.call(mapply, c(FUN = list, lapply(vcf.info, `[`, feature.names))); + setNames(as.data.table(vcf.info), feature.names); + } + +# Split vcf info field to list +vcf.info.string.to.list <- function(vcf.info, keep.columns = NULL) { + list.out <- strsplit(vcf.info, split = ';'); + list.out <- lapply(list.out, function(x) strsplit(x, split = '=')); + labels <- sapply(list.out[[1]], function(x) x[[1]]); + values <- sapply(list.out[[1]], function(x) if (length(x) == 2) x[[2]] else x[[1]]); + names(values) <- labels; + if (is.null(keep.columns)) return(values); + values <- values[labels %in% keep.columns]; + return(values); + } + +################################################################################################### +# Load files +################################################################################################### +input.vcf.path <- input.vcf; +input.vcf <- read.vcfR(input.vcf); +header.contigs <- scan(header.contigs, character()); +liftover.chain <- import.chain(chain.file); + +################################################################################################### +# Data preprocessing +################################################################################################### +if (any(duplicated(input.vcf@fix[, 'ID']))) input.vcf@fix[, 'ID'] <- paste0(substr(input.vcf@fix[, 'ID'], 1, 3), sprintf('%08d', seq_len(nrow(input.vcf@fix)))); +# if (any(duplicated(input.vcf@fix[, 'ID']))) fix.dt[, ID := paste0(substr(ID, 1, 3), sprintf('%08d', seq_len(nrow(fix.dt))))]; +fix.dt <- as.data.table(input.vcf@fix); +gt.dt <- as.data.table(input.vcf@gt); + +fix.dt <- vcf.fix.to.dt(fix.dt); +fix.dt[, CHROM := paste0('chr', CHROM)]; +fix.dt[, CHR2 := ifelse(is.na(CHR2), CHR2, paste0('chr', CHR2))]; + +fix.dt <- fix.dt[, -c('CONSENSUS')]; +numeric.columns <- c('POS', 'QUAL', 'END', 'PE', 'MAPQ', 'SRMAPQ', 'INSLEN', 'HOMLEN', 'SR', 'SRQ', 'CE', 'SVLEN', 'POS2'); +character.columns <- names(fix.dt)[!names(fix.dt) %in% numeric.columns]; +fix.dt[, (numeric.columns) := lapply(.SD, as.numeric), .SDcols = numeric.columns]; +fix.dt[, (character.columns) := lapply(.SD, as.character), .SDcols = character.columns]; + +################################################################################################### +# Liftover +################################################################################################### +# Create GRanges object +granges.37 <- makeGRangesFromDataFrame( + df = fix.dt, + seqnames.field = 'CHROM', + start.field = 'POS', + end.field = 'END', + keep.extra.columns = TRUE + ); + +# Liftover using chain file +granges.38 <- unlist(liftOver(granges.37, liftover.chain)); +granges.38.dt <- as.data.table(granges.38); + +# Create GRanges object using CHROM, CHR2, and POS2 from fix.dt +granges.37.BND <- makeGRangesFromDataFrame( + df = fix.dt[SVTYPE == 'BND', ], + seqnames.field = 'CHR2', + start.field = 'POS2', + end.field = 'POS2', + keep.extra.columns = TRUE + ); +granges.38.BND <- as.data.table(unlist(liftOver(granges.37.BND, liftover.chain))); + +# Remove multiple mappings +granges.38.dt <- granges.38.dt[!duplicated(ID)]; +granges.38.BND <- granges.38.BND[!duplicated(ID)]; +common <- intersect(granges.38.dt$ID, granges.38.BND$ID); + +granges.38.dt[ID %in% common, c('CHR2', 'POS2') := granges.38.BND[ID %in% common, .(seqnames, start)]]; + +pass.liftover <- as.data.table(input.vcf@fix)$ID %in% granges.38.dt$ID; +fix.lifted <- as.data.table(input.vcf@fix)[pass.liftover]; +gt.dt <- gt.dt[pass.liftover]; +for (i in seq_len(nrow(fix.lifted))) { + this.ID <- fix.lifted[i, ID]; + this.INFO <- vcf.info.string.to.list(fix.lifted[i, INFO]); + this.INFO[['END']] <- granges.38.dt[i, end]; + if (this.INFO[['SVTYPE']] == 'BND') { + this.INFO[['CHR2']] <- granges.38.dt[i, CHR2]; + this.INFO[['POS2']] <- granges.38.dt[i, POS2]; + } + this.INFO <- lapply(names(this.INFO), function(x) paste(x, this.INFO[[x]], sep = '=')); + this.INFO <- paste(this.INFO, collapse = ';'); + this.INFO <- gsub('IMPRECISE=IMPRECISE', 'IMPRECISE', this.INFO); + this.INFO <- gsub('PRECISE=PRECISE', 'PRECISE', this.INFO); + this.INFO <- gsub('SOMATIC=SOMATIC', 'SOMATIC', this.INFO); + fix.lifted[i, c('CHROM', 'POS', 'INFO') := granges.38.dt[ID == ..this.ID, .(seqnames, start, ..this.INFO)]]; + } + +################################################################################################### +# Write output vcf +################################################################################################### +output.vcf <- input.vcf; +output.vcf@fix <- as.matrix(fix.lifted); +output.vcf@gt <- as.matrix(gt.dt); +output.vcf@meta <- output.vcf@meta[!grepl("^##(contig|reference)", output.vcf@meta)]; +output.vcf@meta <- c(output.vcf@meta, header.contigs); + +write.vcf(output.vcf, output); diff --git a/module/snv_annotations.nf b/module/snv_annotations.nf index 705855f..e0bcc25 100644 --- a/module/snv_annotations.nf +++ b/module/snv_annotations.nf @@ -1,3 +1,5 @@ +include { compress_and_index_HTSlib } from './utils.nf' + process run_Funcotator_GATK { container params.docker_image_gatk @@ -112,37 +114,6 @@ process extract_TrinucleotideContext_BEDTools { """ } -process compress_and_index_HTSlib { - container params.docker_image_samtools - - publishDir path: "${intermediate_filepath}", - pattern: "output.tsv.gz{,.tbi}", - mode: "copy", - enabled: params.save_intermediate_files - - input: - tuple val(sample_id), path(tsv) - - output: - tuple val(sample_id), path('output.tsv.gz'), path('output.tsv.gz.tbi'), emit: compressed_tsv_with_index - - script: - intermediate_filepath = "${params.output_dir_base}/SAMtools-${params.samtools_version}/intermediate/${task.process}" - - slug = "Trinucleotide-${sample_id}" - - """ - bgzip ${tsv} --output output.tsv.gz - - tabix \ - --sequence 1 \ - --begin 2 \ - --end 2 \ - output.tsv.gz - """ -} - - process annotate_trinucleotide_BCFtools { container params.docker_image_bcftools diff --git a/module/liftover.nf b/module/snv_workflow.nf similarity index 52% rename from module/liftover.nf rename to module/snv_workflow.nf index 5e8f09c..3bb549f 100644 --- a/module/liftover.nf +++ b/module/snv_workflow.nf @@ -1,12 +1,4 @@ -/* -* Module/process description here -* -* @input -* @params -* @output -*/ - -// include { generate_standard_filename } from '../external/pipeline-Nextflow-module/modules/common/generate_standardized_filename/main.nf' +include { workflow_apply_snv_annotations } from './module/snv_annotations.nf' process run_liftover_BCFtools { container params.docker_image_bcftools @@ -63,3 +55,68 @@ process run_liftover_BCFtools { --output "liftover.vcf.gz" """ } + +process extract_VCF_features_StableLift { + container params.docker_image_stablelift + containerOptions "-v ${moduleDir}:${moduleDir}" + + publishDir path: "${intermediate_filepath}", + pattern: "features.Rds", + mode: "copy", + enabled: params.save_intermediate_files, + saveAs: { "${slug}.${file(it).getExtension()}" } + + input: + tuple val(sample_id), path(vcf) + + output: + tuple val(sample_id), path('features.Rds'), emit: r_annotations + + script: + intermediate_filepath = "${params.output_dir_base}/stablelift-${params.stablelift_version}/intermediate/${task.process}" + + slug = "stablelift-${sample_id}" + + """ + Rscript "${moduleDir}/scripts/extract-vcf-features.R" \ + --input-vcf "${vcf}" \ + --variant-caller ${params.variant_caller} \ + --output-rds "features.Rds" + """ +} + +workflow workflow_extract_snv_annotations { + take: + vcf_with_sample_id + src_sequence + dest_sequence + chain_file + variant_caller + + main: + + // Step 1: Liftover + run_liftover_BCFtools( + vcf_with_sample_id, + src_sequence, + dest_sequence, + chain_file + ) + + // Step 2: Annotate + workflow_apply_snv_annotations( + run_liftover_BCFtools.out.liftover_vcf_with_index, + dest_sequence + ) + + // Step 3: Extract features + // FIXME Parallelize HaplotypeCaller + extract_VCF_features_StableLift( + workflow_apply_snv_annotations.out.annotated_vcf + ) + + emit: + liftover_vcf = workflow_apply_snv_annotations.out.annotated_vcf + r_annotations = extract_VCF_features_StableLift.out.r_annotations +} + diff --git a/module/sv_workflow.nf b/module/sv_workflow.nf index 7fcd089..f33220c 100644 --- a/module/sv_workflow.nf +++ b/module/sv_workflow.nf @@ -75,12 +75,14 @@ workflow workflow_extract_sv_annotations { main: + // Step 1: Liftover run_sv_liftover( vcf_with_sample_id, header_contigs, chain_file ) + // Step 2: Extract features run_intersect_gnomad( run_sv_liftover.out.liftover_vcf, gnomad_rds, @@ -88,5 +90,6 @@ workflow workflow_extract_sv_annotations { ) emit: + liftover_vcf = run_sv_liftover.out.liftover_vcf r_annotations = run_intersect_gnomad.out.r_annotations } diff --git a/module/utils.nf b/module/utils.nf new file mode 100644 index 0000000..c00cc90 --- /dev/null +++ b/module/utils.nf @@ -0,0 +1,30 @@ + +process compress_and_index_HTSlib { + container params.docker_image_samtools + + publishDir path: "${intermediate_filepath}", + pattern: "output.tsv.gz{,.tbi}", + mode: "copy", + enabled: params.save_intermediate_files + + input: + tuple val(sample_id), path(tsv) + + output: + tuple val(sample_id), path('output.tsv.gz'), path('output.tsv.gz.tbi'), emit: compressed_tsv_with_index + + script: + intermediate_filepath = "${params.output_dir_base}/SAMtools-${params.samtools_version}/intermediate/${task.process}" + + slug = "${sample_id}" + + """ + bgzip ${tsv} --output output.tsv.gz + + tabix \ + --sequence 1 \ + --begin 2 \ + --end 2 \ + output.tsv.gz + """ +} From 0e499b34247c59ec56ebe843ae104e57856423bc Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Wed, 24 Jul 2024 13:28:08 -0700 Subject: [PATCH 08/42] Add stubs to all processes --- module/predict_stability.nf | 18 ++++++++++++++++++ module/snv_annotations.nf | 21 +++++++++++++++++++++ module/snv_workflow.nf | 11 +++++++++++ module/sv_workflow.nf | 10 ++++++++++ module/utils.nf | 6 ++++++ 5 files changed, 66 insertions(+) diff --git a/module/predict_stability.nf b/module/predict_stability.nf index b66118b..f1dd7e1 100644 --- a/module/predict_stability.nf +++ b/module/predict_stability.nf @@ -27,6 +27,11 @@ process extract_VCF_features_StableLift { --variant-caller ${params.variant_caller} \ --output-rds "features.Rds" """ + + stub: + """ + touch features.Rds + """ } process predict_stability_StableLift { @@ -54,6 +59,11 @@ process predict_stability_StableLift { --variant-caller "${params.variant_caller}" \ --output-tsv "${output_file_name}" """ + + stub: + """ + touch "${output_file_name}" + """ } process run_apply_stability_annotations { @@ -107,6 +117,14 @@ process run_apply_stability_annotations { bcftools index -t "${filtered_vcf}" """ + + stub: + """ + touch "${stability_vcf}" + touch "${stability_vcf_tbi}" + touch "${filtered_vcf}" + touch "${filtered_vcf_tbi}" + """ } workflow workflow_predict_stability { diff --git a/module/snv_annotations.nf b/module/snv_annotations.nf index e0bcc25..0e8fcb2 100644 --- a/module/snv_annotations.nf +++ b/module/snv_annotations.nf @@ -33,6 +33,11 @@ process run_Funcotator_GATK { --output-file-format VCF \ --output "output.vcf.gz" """ + + stub: + """ + touch "output.vcf.gz" + """ } process annotate_RepeatMasker_BCFtools { @@ -64,6 +69,11 @@ process annotate_RepeatMasker_BCFtools { -o "output.vcf.gz" \ "${vcf}" """ + + stub: + """ + touch "output.vcf.gz" + """ } process extract_TrinucleotideContext_BEDTools { @@ -112,6 +122,12 @@ process extract_TrinucleotideContext_BEDTools { <(cut -f 2 "partial.tsv") \ > "output.tsv" """ + + stub: + """ + touch "output.tsv" + touch "output.bed" + """ } process annotate_trinucleotide_BCFtools { @@ -145,6 +161,11 @@ process annotate_trinucleotide_BCFtools { --output output.vcf.gz \ ${vcf} """ + + stub: + """ + touch "output.vcf.gz" + """ } workflow workflow_apply_snv_annotations { diff --git a/module/snv_workflow.nf b/module/snv_workflow.nf index 3bb549f..618957c 100644 --- a/module/snv_workflow.nf +++ b/module/snv_workflow.nf @@ -54,6 +54,12 @@ process run_liftover_BCFtools { --write-index=tbi \ --output "liftover.vcf.gz" """ + + stub: + """ + touch "liftover.vcf.gz" + touch "liftover.vcf.gz.tbi" + """ } process extract_VCF_features_StableLift { @@ -83,6 +89,11 @@ process extract_VCF_features_StableLift { --variant-caller ${params.variant_caller} \ --output-rds "features.Rds" """ + + stub: + """ + touch "features.Rds" + """ } workflow workflow_extract_snv_annotations { diff --git a/module/sv_workflow.nf b/module/sv_workflow.nf index f33220c..f4b6ba7 100644 --- a/module/sv_workflow.nf +++ b/module/sv_workflow.nf @@ -31,6 +31,11 @@ process run_sv_liftover{ --chain-file ${chain_file} \ --output "liftover.vcf.gz" """ + + stub: + """ + touch "liftover.vcf.gz" + """ } process run_intersect_gnomad { @@ -63,6 +68,11 @@ process run_intersect_gnomad { --output-rds "annotations.Rds" \ --gnomad-rds ${gnomad_rds} """ + + stub: + """ + touch "annotations.Rds" + """ } workflow workflow_extract_sv_annotations { diff --git a/module/utils.nf b/module/utils.nf index c00cc90..2f4f27a 100644 --- a/module/utils.nf +++ b/module/utils.nf @@ -27,4 +27,10 @@ process compress_and_index_HTSlib { --end 2 \ output.tsv.gz """ + + stub: + """ + touch "output.tsv.gz" + touch "output.tsv.gz.tbi" + """ } From 1f18dfe29fc3a6c149c7e8f35f5bed98b2488b14 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Wed, 24 Jul 2024 15:39:35 -0700 Subject: [PATCH 09/42] Bugfix, need leading params --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index 272e531..d4a2093 100644 --- a/main.nf +++ b/main.nf @@ -152,7 +152,7 @@ workflow { Channel.value(header_contigs), Channel.value(gnomad_rds), Channel.value(chain_file), - Channel.value(variant_caller) + Channel.value(params.variant_caller) ) workflow_extract_sv_annotations.out.r_annotations.set { liftover_vcf } @@ -165,7 +165,7 @@ workflow { input_ch_src_sequence, input_ch_dest_sequence, Channel.value(params.chain_file), - Channel.value(variant_caller) + Channel.value(params.variant_caller) ) workflow_extract_snv_annotations.out.r_annotations.set { liftover_vcf } From 83cd1e73acbb5d5b589a787d4c5c568cb0ec9fe0 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Wed, 24 Jul 2024 15:40:14 -0700 Subject: [PATCH 10/42] Bugfix, remove module/ from relative path --- module/snv_workflow.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/snv_workflow.nf b/module/snv_workflow.nf index 618957c..d00b972 100644 --- a/module/snv_workflow.nf +++ b/module/snv_workflow.nf @@ -1,4 +1,4 @@ -include { workflow_apply_snv_annotations } from './module/snv_annotations.nf' +include { workflow_apply_snv_annotations } from './snv_annotations.nf' process run_liftover_BCFtools { container params.docker_image_bcftools From 966788baefcf9fc25907f904f7a4f38d16b24d63 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Wed, 24 Jul 2024 15:42:06 -0700 Subject: [PATCH 11/42] Remove redundant process --- module/predict_stability.nf | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/module/predict_stability.nf b/module/predict_stability.nf index f1dd7e1..5542ec5 100644 --- a/module/predict_stability.nf +++ b/module/predict_stability.nf @@ -1,39 +1,5 @@ include { compress_and_index_HTSlib } from './utils.nf' -process extract_VCF_features_StableLift { - container params.docker_image_stablelift - containerOptions "-v ${moduleDir}:${moduleDir}" - - publishDir path: "${intermediate_filepath}", - pattern: "features.Rds", - mode: "copy", - enabled: params.save_intermediate_files, - saveAs: { "${slug}.${file(it).getExtension()}" } - - input: - tuple val(sample_id), path(vcf) - - output: - tuple val(sample_id), path('features.Rds'), emit: r_annotations - - script: - intermediate_filepath = "${params.output_dir_base}/stablelift-${params.stablelift_version}/intermediate/${task.process}" - - slug = "stablelift-${sample_id}" - - """ - Rscript "${moduleDir}/scripts/extract-vcf-features.R" \ - --input-vcf "${vcf}" \ - --variant-caller ${params.variant_caller} \ - --output-rds "features.Rds" - """ - - stub: - """ - touch features.Rds - """ -} - process predict_stability_StableLift { container params.docker_image_stablelift containerOptions "-v ${moduleDir}:${moduleDir}" From 645f154a24fccb3c166ec5ee2b58f586b5daf3fe Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Wed, 24 Jul 2024 15:43:42 -0700 Subject: [PATCH 12/42] Bugfix, clean up an undefined stub variable --- main.nf | 3 ++- module/predict_stability.nf | 20 +++++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/main.nf b/main.nf index d4a2093..f01cfb8 100644 --- a/main.nf +++ b/main.nf @@ -176,6 +176,7 @@ workflow { workflow_predict_stability( liftover_vcf, r_annotations, - Channel.value(params.rf_model) + Channel.value(params.rf_model), + Channel.value(params.variant_caller) ) } diff --git a/module/predict_stability.nf b/module/predict_stability.nf index 5542ec5..2ef6345 100644 --- a/module/predict_stability.nf +++ b/module/predict_stability.nf @@ -5,30 +5,30 @@ process predict_stability_StableLift { containerOptions "-v ${moduleDir}:${moduleDir}" publishDir path: "${params.output_dir_base}/output", - pattern: "${output_file_name}", - mode: "copy" + pattern: "stability.tsv", + mode: "copy", + saveAs: { "StableLift-${sample_id}-${variant_caller}.tsv" } input: tuple val(sample_id), path(features_rds) path(rf_model) + val(variant_caller) output: - tuple val(sample_id), path(output_file_name), emit: stability_tsv + tuple val(sample_id), path("stability.tsv"), emit: stability_tsv script: - output_file_name = "stablelift-${sample_id}.tsv" - """ Rscript "${moduleDir}/scripts/predict-liftover-stability.R" \ --features-dt "${features_rds}" \ --rf-model "${rf_model}" \ - --variant-caller "${params.variant_caller}" \ - --output-tsv "${output_file_name}" + --variant-caller "${variant_caller}" \ + --output-tsv "stability.tsv" """ stub: """ - touch "${output_file_name}" + touch "stability.tsv" """ } @@ -98,12 +98,14 @@ workflow workflow_predict_stability { vcf_with_sample_id r_annotations rf_model + variant_caller main: predict_stability_StableLift( r_annotations, - rf_model + rf_model, + variant_caller ) compress_and_index_HTSlib( From 88ae361ca00227bc6941a38a825e7b0e6c326896 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Wed, 24 Jul 2024 15:46:05 -0700 Subject: [PATCH 13/42] Bugfix, clean up more undefined stub variables --- module/predict_stability.nf | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/module/predict_stability.nf b/module/predict_stability.nf index 2ef6345..1ae6987 100644 --- a/module/predict_stability.nf +++ b/module/predict_stability.nf @@ -36,8 +36,9 @@ process run_apply_stability_annotations { container params.docker_image_bcftools publishDir path: "${params.output_dir_base}/output", - pattern: "*.vcf.gz{,.tbi}", - mode: "copy" + pattern: "{stability,filtered}.vcf.gz{,.tbi}", + mode: "copy", + saveAs: { "${sample_id}-${it}" } input: tuple val(sample_id), @@ -48,21 +49,19 @@ process run_apply_stability_annotations { output: tuple val(sample_id), - path(stability_vcf), - path(stability_vcf_tbi), + path("stability.vcf.gz"), + path("stability.vcf.gz.tbi"), emit: stability_vcf_with_index tuple val(sample_id), - path(filtered_vcf), - path(filtered_vcf_tbi), + path("filtered.vcf.gz"), + path("filtered.vcf.gz.tbi"), emit: filtered_vcf_with_index script: - slug = "${sample_id}_LiftOver" - - stability_vcf = "${slug}_stability.vcf.gz" + stability_vcf = "stability.vcf.gz" stability_vcf_tbi = "${stability_vcf}.tbi" - filtered_vcf = "${slug}_filtered.vcf.gz" + filtered_vcf = "filtered.vcf.gz" filtered_vcf_tbi = "${filtered_vcf}.tbi" """ @@ -86,10 +85,10 @@ process run_apply_stability_annotations { stub: """ - touch "${stability_vcf}" - touch "${stability_vcf_tbi}" - touch "${filtered_vcf}" - touch "${filtered_vcf_tbi}" + touch "stability.vcf.gz" + touch "stability.vcf.gz.tbi" + touch "filtered.vcf.gz" + touch "filtered.vcf.gz.tbi" """ } From 04e7f2670425df83b24a31dd082eaf0cf449e9c0 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Wed, 24 Jul 2024 15:48:22 -0700 Subject: [PATCH 14/42] Get rid of variables in utils module --- module/utils.nf | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/module/utils.nf b/module/utils.nf index 2f4f27a..0967698 100644 --- a/module/utils.nf +++ b/module/utils.nf @@ -2,22 +2,22 @@ process compress_and_index_HTSlib { container params.docker_image_samtools - publishDir path: "${intermediate_filepath}", + publishDir path: "${params.output_dir_base}/SAMtools-${params.samtools_version}/intermediate/${task.process}", pattern: "output.tsv.gz{,.tbi}", mode: "copy", - enabled: params.save_intermediate_files + enabled: params.save_intermediate_files, + saveAs: { "${sample_id}${file(it).getName().replace(file(it).getSimpleName(), "")}" } input: tuple val(sample_id), path(tsv) output: - tuple val(sample_id), path('output.tsv.gz'), path('output.tsv.gz.tbi'), emit: compressed_tsv_with_index + tuple val(sample_id), + path('output.tsv.gz'), + path('output.tsv.gz.tbi'), + emit: compressed_tsv_with_index script: - intermediate_filepath = "${params.output_dir_base}/SAMtools-${params.samtools_version}/intermediate/${task.process}" - - slug = "${sample_id}" - """ bgzip ${tsv} --output output.tsv.gz From dfcb7037b8a38bb3296347ce44b5c701d9493879 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Wed, 24 Jul 2024 15:52:44 -0700 Subject: [PATCH 15/42] Clean up variables in sv_workflow.nf --- module/sv_workflow.nf | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/module/sv_workflow.nf b/module/sv_workflow.nf index f4b6ba7..f09996a 100644 --- a/module/sv_workflow.nf +++ b/module/sv_workflow.nf @@ -2,11 +2,11 @@ process run_sv_liftover{ container params.docker_image_stablelift - publishDir path: "${intermediate_filepath}", + publishDir path: "${params.output_dir_base}/StableLift-${params.stable_version}/intermediate/${task.process}", pattern: "liftover.vcf.gz", mode: "copy", enabled: params.save_intermediate_files, - saveAs: { "${slug}.vcf.gz" } + saveAs: { "LiftOver-${sample_id}.vcf.gz" } input: tuple val(sample_id), @@ -19,16 +19,11 @@ process run_sv_liftover{ tuple val(sample_id), path('liftover.vcf.gz'), emit: liftover_vcf script: - // FIXME Use a more standard path - intermediate_path = "${params.output_dir_base}/StableLift-${params.stable_version}/intermediate/${task.process}" - - slug = "LiftOver-${sample_id}" - """ Rscript "${moduleDir}/scripts/liftover-Delly2-vcf.R \ - --input-vcf ${vcf} \ - --header-contigs ${header_contigs} \ - --chain-file ${chain_file} \ + --input-vcf "${vcf}" \ + --header-contigs "${header_contigs}" \ + --chain-file "${chain_file}" \ --output "liftover.vcf.gz" """ @@ -41,11 +36,11 @@ process run_sv_liftover{ process run_intersect_gnomad { container params.docker_image_stablelift - publishDir path: "${intermediate_filepath}", + publishDir path: "${params.output_dir_base}/StableLift-${params.stable_version}/intermediate/${task.process}", pattern: "annotations.Rds", mode: "copy", enabled: params.save_intermediate_files, - saveAs: { "${slug}.Rds" } + saveAs: { "LiftOver-${sample_id}-${variant_caller}.Rds" } input: tuple val(sample_id), path(vcf, stageAs: 'inputs/*') @@ -56,11 +51,6 @@ process run_intersect_gnomad { tuple val(sample_id), path('annotations.Rds'), emit: r_annotations script: - // FIXME Use a more standard path - intermediate_path = "${params.output_dir_base}/StableLift-${params.stable_version}/intermediate/${task.process}" - - slug = "LiftOver-${sample_id}-${variant_caller}" - """ Rscript ${moduleDir}/scripts/publish/extract-vcf-features-SV.R \ --variant-caller "${variant_caller}" \ From 257eb74ca244a67c7e63d6fe0e52d73649ce834e Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Wed, 24 Jul 2024 15:56:13 -0700 Subject: [PATCH 16/42] Clean up variables in snv_workflow.nf --- module/snv_workflow.nf | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/module/snv_workflow.nf b/module/snv_workflow.nf index d00b972..b46fc90 100644 --- a/module/snv_workflow.nf +++ b/module/snv_workflow.nf @@ -3,23 +3,11 @@ include { workflow_apply_snv_annotations } from './snv_annotations.nf' process run_liftover_BCFtools { container params.docker_image_bcftools - publishDir path: "${intermediate_path}", - pattern: "reject.vcf.gz", + publishDir path: "${params.output_dir_base}/BCFtools-${params.bcftools_version}/intermediate/${task.process}", + pattern: "{reject,liftover}.vcf.gz{,.tbi}", mode: "copy", enabled: params.save_intermediate_files, - saveAs: { "${slug}-reject.vcf.gz" } - - publishDir path: "${intermediate_path}", - pattern: "liftover.vcf.gz", - mode: "copy", - enabled: params.save_intermediate_files, - saveAs: { "${slug}.vcf.gz" } - - publishDir path: "${intermediate_path}", - pattern: "liftover.vcf.gz.tbi", - mode: "copy", - enabled: params.save_intermediate_files, - saveAs: { "${slug}.vcf.gz.tbi" } + saveAs: { filename -> "LiftOver-${sample_id}-${src_fasta_id}-to-${dest_fasta_id}-${filename}" } input: tuple val(sample_id), path(vcf), path(index) @@ -31,11 +19,6 @@ process run_liftover_BCFtools { tuple val(sample_id), path('liftover.vcf.gz'), path('liftover.vcf.gz.tbi'), emit: liftover_vcf_with_index script: - // FIXME Use a more standard path - intermediate_path = "${params.output_dir_base}/BCFtools-${params.bcftools_version}/intermediate/${task.process}" - - slug = "LiftOver-${sample_id}-${src_fasta_id}-to-${dest_fasta_id}" - """ bcftools +liftover \ --output-type u \ @@ -59,6 +42,7 @@ process run_liftover_BCFtools { """ touch "liftover.vcf.gz" touch "liftover.vcf.gz.tbi" + touch "reject.vcf.gz" """ } @@ -66,11 +50,11 @@ process extract_VCF_features_StableLift { container params.docker_image_stablelift containerOptions "-v ${moduleDir}:${moduleDir}" - publishDir path: "${intermediate_filepath}", + publishDir path: "${params.output_dir_base}/StableLift-${params.stablelift_version}/intermediate/${task.process}", pattern: "features.Rds", mode: "copy", enabled: params.save_intermediate_files, - saveAs: { "${slug}.${file(it).getExtension()}" } + saveAs: { "StableLift-${sample_id}.Rds" } input: tuple val(sample_id), path(vcf) @@ -79,10 +63,6 @@ process extract_VCF_features_StableLift { tuple val(sample_id), path('features.Rds'), emit: r_annotations script: - intermediate_filepath = "${params.output_dir_base}/stablelift-${params.stablelift_version}/intermediate/${task.process}" - - slug = "stablelift-${sample_id}" - """ Rscript "${moduleDir}/scripts/extract-vcf-features.R" \ --input-vcf "${vcf}" \ From 01d3df7217a47d25750c62c9a2b783777120a974 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Wed, 24 Jul 2024 16:00:45 -0700 Subject: [PATCH 17/42] Clean up variables in snv_annotations.nf --- module/snv_annotations.nf | 39 +++++++++------------------------------ 1 file changed, 9 insertions(+), 30 deletions(-) diff --git a/module/snv_annotations.nf b/module/snv_annotations.nf index 0e8fcb2..9983950 100644 --- a/module/snv_annotations.nf +++ b/module/snv_annotations.nf @@ -3,11 +3,11 @@ include { compress_and_index_HTSlib } from './utils.nf' process run_Funcotator_GATK { container params.docker_image_gatk - publishDir path: "${intermediate_filepath}", + publishDir path: "${params.output_dir_base}/GATK-${params.gatk_version}/intermediate/${task.process}", pattern: "output.vcf.gz", mode: "copy", enabled: params.save_intermediate_files, - saveAs: { "${slug}.vcf.gz" } + saveAs: { "Funcotator-${sample_id}-${dest_fasta_id}.vcf.gz" } input: tuple val(sample_id), @@ -20,10 +20,6 @@ process run_Funcotator_GATK { tuple val(sample_id), path('output.vcf.gz'), emit: funcotator_vcf script: - intermediate_filepath = "${params.output_dir_base}/GATK-${params.gatk_version}/intermediate/${task.process}" - - slug = "Funcotator-${sample_id}-${dest_fasta_id}" - """ gatk Funcotator \ --variant "${vcf}" \ @@ -43,11 +39,11 @@ process run_Funcotator_GATK { process annotate_RepeatMasker_BCFtools { container params.docker_image_bcftools - publishDir path: "${intermediate_filepath}", + publishDir path: "${params.output_dir_base}/BCFtools-${params.bcftools_version}/intermediate/${task.process}", pattern: "output.vcf.gz", mode: "copy", enabled: params.save_intermediate_files, - saveAs: { "${slug}.vcf.gz" } + saveAs: { "RepeatMasker-${sample_id}.vcf.gz" } input: tuple val(sample_id), path(vcf, stageAs: 'inputs/*') @@ -57,9 +53,6 @@ process annotate_RepeatMasker_BCFtools { tuple val(sample_id), path('output.vcf.gz'), emit: repeatmasker_vcf script: - intermediate_filepath = "${params.output_dir_base}/BCFtools-${params.bcftools_version}/intermediate/${task.process}" - - slug = "RepeatMasker-${sample_id}" """ bcftools annotate \ @@ -79,17 +72,11 @@ process annotate_RepeatMasker_BCFtools { process extract_TrinucleotideContext_BEDTools { container params.docker_image_bedtools - publishDir path: "${intermediate_filepath}", - pattern: "output.bed", - mode: "copy", - enabled: params.save_intermediate_files, - saveAs: { "${slug}.bed" } - - publishDir path: "${intermediate_filepath}", - pattern: "output.tsv", + publishDir path: "${params.output_dir_base}/BEDtools-${params.bedtools_version}/intermediate/${task.process}", + pattern: "output.{bed,tsv}", mode: "copy", enabled: params.save_intermediate_files, - saveAs: { "${slug}-full.tsv" } + saveAs: { "Trinucleotide-${sample_id}-${dest_fasta_id}.${file(it).getExtension()}" } input: tuple val(sample_id), path(vcf) @@ -100,10 +87,6 @@ process extract_TrinucleotideContext_BEDTools { tuple val(sample_id), path('output.bed'), emit: trinucleotide_bed script: - intermediate_filepath = "${params.output_dir_base}/BEDtools-${params.bedtools_version}/intermediate/${task.process}" - - slug = "Trinucleotide-${sample_id}" - """ zcat ${vcf} \ | grep -v "^#" \ @@ -133,11 +116,11 @@ process extract_TrinucleotideContext_BEDTools { process annotate_trinucleotide_BCFtools { container params.docker_image_bcftools - publishDir path: "${intermediate_filepath}", + publishDir path: "${params.output_dir_base}/BCFtools-${params.bcftools_version}/intermediate/${task.process}", pattern: "output.vcf.gz", mode: "copy", enabled: params.save_intermediate_files, - saveAs: { "${slug}.vcf.gz" } + saveAs: { "Trinucleotide-annotated-${sample_id}.vcf.gz" } input: tuple val(sample_id), @@ -149,10 +132,6 @@ process annotate_trinucleotide_BCFtools { tuple val(sample_id), path('output.vcf.gz'), emit: trinucleotide_vcf script: - intermediate_filepath = "${params.output_dir_base}/BCFtools-${params.bcftools_version}/intermediate/${task.process}" - - slug = "Trinucleotide-${sample_id}" - """ bcftools annotate \ --annotations ${tsv} \ From 3359f08fdc732eaf997230cc06c55fa2edec6d18 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Wed, 24 Jul 2024 16:07:09 -0700 Subject: [PATCH 18/42] Replace colons with slashes --- module/snv_annotations.nf | 8 ++++---- module/snv_workflow.nf | 4 ++-- module/sv_workflow.nf | 4 ++-- module/utils.nf | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/module/snv_annotations.nf b/module/snv_annotations.nf index 9983950..9e6af47 100644 --- a/module/snv_annotations.nf +++ b/module/snv_annotations.nf @@ -3,7 +3,7 @@ include { compress_and_index_HTSlib } from './utils.nf' process run_Funcotator_GATK { container params.docker_image_gatk - publishDir path: "${params.output_dir_base}/GATK-${params.gatk_version}/intermediate/${task.process}", + publishDir path: "${params.output_dir_base}/GATK-${params.gatk_version}/intermediate/${task.process.replace(':', '/')}", pattern: "output.vcf.gz", mode: "copy", enabled: params.save_intermediate_files, @@ -39,7 +39,7 @@ process run_Funcotator_GATK { process annotate_RepeatMasker_BCFtools { container params.docker_image_bcftools - publishDir path: "${params.output_dir_base}/BCFtools-${params.bcftools_version}/intermediate/${task.process}", + publishDir path: "${params.output_dir_base}/BCFtools-${params.bcftools_version}/intermediate/${task.process.replace(':', '/')}", pattern: "output.vcf.gz", mode: "copy", enabled: params.save_intermediate_files, @@ -72,7 +72,7 @@ process annotate_RepeatMasker_BCFtools { process extract_TrinucleotideContext_BEDTools { container params.docker_image_bedtools - publishDir path: "${params.output_dir_base}/BEDtools-${params.bedtools_version}/intermediate/${task.process}", + publishDir path: "${params.output_dir_base}/BEDtools-${params.bedtools_version}/intermediate/${task.process.replace(':', '/')}", pattern: "output.{bed,tsv}", mode: "copy", enabled: params.save_intermediate_files, @@ -116,7 +116,7 @@ process extract_TrinucleotideContext_BEDTools { process annotate_trinucleotide_BCFtools { container params.docker_image_bcftools - publishDir path: "${params.output_dir_base}/BCFtools-${params.bcftools_version}/intermediate/${task.process}", + publishDir path: "${params.output_dir_base}/BCFtools-${params.bcftools_version}/intermediate/${task.process.replace(':', '/')}", pattern: "output.vcf.gz", mode: "copy", enabled: params.save_intermediate_files, diff --git a/module/snv_workflow.nf b/module/snv_workflow.nf index b46fc90..4a445aa 100644 --- a/module/snv_workflow.nf +++ b/module/snv_workflow.nf @@ -3,7 +3,7 @@ include { workflow_apply_snv_annotations } from './snv_annotations.nf' process run_liftover_BCFtools { container params.docker_image_bcftools - publishDir path: "${params.output_dir_base}/BCFtools-${params.bcftools_version}/intermediate/${task.process}", + publishDir path: "${params.output_dir_base}/BCFtools-${params.bcftools_version}/intermediate/${task.process.replace(':', '/')}", pattern: "{reject,liftover}.vcf.gz{,.tbi}", mode: "copy", enabled: params.save_intermediate_files, @@ -50,7 +50,7 @@ process extract_VCF_features_StableLift { container params.docker_image_stablelift containerOptions "-v ${moduleDir}:${moduleDir}" - publishDir path: "${params.output_dir_base}/StableLift-${params.stablelift_version}/intermediate/${task.process}", + publishDir path: "${params.output_dir_base}/StableLift-${params.stablelift_version}/intermediate/${task.process.replace(':', '/')}", pattern: "features.Rds", mode: "copy", enabled: params.save_intermediate_files, diff --git a/module/sv_workflow.nf b/module/sv_workflow.nf index f09996a..60e37fa 100644 --- a/module/sv_workflow.nf +++ b/module/sv_workflow.nf @@ -2,7 +2,7 @@ process run_sv_liftover{ container params.docker_image_stablelift - publishDir path: "${params.output_dir_base}/StableLift-${params.stable_version}/intermediate/${task.process}", + publishDir path: "${params.output_dir_base}/StableLift-${params.stable_version}/intermediate/${task.process.replace(':', '/')}", pattern: "liftover.vcf.gz", mode: "copy", enabled: params.save_intermediate_files, @@ -36,7 +36,7 @@ process run_sv_liftover{ process run_intersect_gnomad { container params.docker_image_stablelift - publishDir path: "${params.output_dir_base}/StableLift-${params.stable_version}/intermediate/${task.process}", + publishDir path: "${params.output_dir_base}/StableLift-${params.stable_version}/intermediate/${task.process.replace(':', '/')}", pattern: "annotations.Rds", mode: "copy", enabled: params.save_intermediate_files, diff --git a/module/utils.nf b/module/utils.nf index 0967698..948d8f3 100644 --- a/module/utils.nf +++ b/module/utils.nf @@ -2,7 +2,7 @@ process compress_and_index_HTSlib { container params.docker_image_samtools - publishDir path: "${params.output_dir_base}/SAMtools-${params.samtools_version}/intermediate/${task.process}", + publishDir path: "${params.output_dir_base}/SAMtools-${params.samtools_version}/intermediate/${task.process.replace(':', '/')}", pattern: "output.tsv.gz{,.tbi}", mode: "copy", enabled: params.save_intermediate_files, From 162f22efe6f1610347f0f6b79070bd84fa66b8b5 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Wed, 24 Jul 2024 16:10:27 -0700 Subject: [PATCH 19/42] Combine intermediate files --- module/snv_annotations.nf | 8 ++++---- module/snv_workflow.nf | 4 ++-- module/sv_workflow.nf | 4 ++-- module/utils.nf | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/module/snv_annotations.nf b/module/snv_annotations.nf index 9e6af47..78ea248 100644 --- a/module/snv_annotations.nf +++ b/module/snv_annotations.nf @@ -3,7 +3,7 @@ include { compress_and_index_HTSlib } from './utils.nf' process run_Funcotator_GATK { container params.docker_image_gatk - publishDir path: "${params.output_dir_base}/GATK-${params.gatk_version}/intermediate/${task.process.replace(':', '/')}", + publishDir path: "${params.output_dir_base}/intermediate/${task.process.replace(':', '/')}", pattern: "output.vcf.gz", mode: "copy", enabled: params.save_intermediate_files, @@ -39,7 +39,7 @@ process run_Funcotator_GATK { process annotate_RepeatMasker_BCFtools { container params.docker_image_bcftools - publishDir path: "${params.output_dir_base}/BCFtools-${params.bcftools_version}/intermediate/${task.process.replace(':', '/')}", + publishDir path: "${params.output_dir_base}/intermediate/${task.process.replace(':', '/')}", pattern: "output.vcf.gz", mode: "copy", enabled: params.save_intermediate_files, @@ -72,7 +72,7 @@ process annotate_RepeatMasker_BCFtools { process extract_TrinucleotideContext_BEDTools { container params.docker_image_bedtools - publishDir path: "${params.output_dir_base}/BEDtools-${params.bedtools_version}/intermediate/${task.process.replace(':', '/')}", + publishDir path: "${params.output_dir_base}/intermediate/${task.process.replace(':', '/')}", pattern: "output.{bed,tsv}", mode: "copy", enabled: params.save_intermediate_files, @@ -116,7 +116,7 @@ process extract_TrinucleotideContext_BEDTools { process annotate_trinucleotide_BCFtools { container params.docker_image_bcftools - publishDir path: "${params.output_dir_base}/BCFtools-${params.bcftools_version}/intermediate/${task.process.replace(':', '/')}", + publishDir path: "${params.output_dir_base}/intermediate/${task.process.replace(':', '/')}", pattern: "output.vcf.gz", mode: "copy", enabled: params.save_intermediate_files, diff --git a/module/snv_workflow.nf b/module/snv_workflow.nf index 4a445aa..f04de2d 100644 --- a/module/snv_workflow.nf +++ b/module/snv_workflow.nf @@ -3,7 +3,7 @@ include { workflow_apply_snv_annotations } from './snv_annotations.nf' process run_liftover_BCFtools { container params.docker_image_bcftools - publishDir path: "${params.output_dir_base}/BCFtools-${params.bcftools_version}/intermediate/${task.process.replace(':', '/')}", + publishDir path: "${params.output_dir_base}/intermediate/${task.process.replace(':', '/')}", pattern: "{reject,liftover}.vcf.gz{,.tbi}", mode: "copy", enabled: params.save_intermediate_files, @@ -50,7 +50,7 @@ process extract_VCF_features_StableLift { container params.docker_image_stablelift containerOptions "-v ${moduleDir}:${moduleDir}" - publishDir path: "${params.output_dir_base}/StableLift-${params.stablelift_version}/intermediate/${task.process.replace(':', '/')}", + publishDir path: "${params.output_dir_base}/intermediate/${task.process.replace(':', '/')}", pattern: "features.Rds", mode: "copy", enabled: params.save_intermediate_files, diff --git a/module/sv_workflow.nf b/module/sv_workflow.nf index 60e37fa..73cc690 100644 --- a/module/sv_workflow.nf +++ b/module/sv_workflow.nf @@ -2,7 +2,7 @@ process run_sv_liftover{ container params.docker_image_stablelift - publishDir path: "${params.output_dir_base}/StableLift-${params.stable_version}/intermediate/${task.process.replace(':', '/')}", + publishDir path: "${params.output_dir_base}/intermediate/${task.process.replace(':', '/')}", pattern: "liftover.vcf.gz", mode: "copy", enabled: params.save_intermediate_files, @@ -36,7 +36,7 @@ process run_sv_liftover{ process run_intersect_gnomad { container params.docker_image_stablelift - publishDir path: "${params.output_dir_base}/StableLift-${params.stable_version}/intermediate/${task.process.replace(':', '/')}", + publishDir path: "${params.output_dir_base}/intermediate/${task.process.replace(':', '/')}", pattern: "annotations.Rds", mode: "copy", enabled: params.save_intermediate_files, diff --git a/module/utils.nf b/module/utils.nf index 948d8f3..1c8efdf 100644 --- a/module/utils.nf +++ b/module/utils.nf @@ -2,7 +2,7 @@ process compress_and_index_HTSlib { container params.docker_image_samtools - publishDir path: "${params.output_dir_base}/SAMtools-${params.samtools_version}/intermediate/${task.process.replace(':', '/')}", + publishDir path: "${params.output_dir_base}/intermediate/${task.process.replace(':', '/')}", pattern: "output.tsv.gz{,.tbi}", mode: "copy", enabled: params.save_intermediate_files, From 92a73a5ae781012c49fd1e6206a0b05debeed0f4 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Wed, 24 Jul 2024 16:22:50 -0700 Subject: [PATCH 20/42] Rename NFTest case as SNV-specific --- nftest.yml | 4 ++-- test/{nftest.config => common.config} | 9 --------- test/snv.config | 13 +++++++++++++ 3 files changed, 15 insertions(+), 11 deletions(-) rename test/{nftest.config => common.config} (79%) create mode 100644 test/snv.config diff --git a/nftest.yml b/nftest.yml index 32644f5..d9253bc 100644 --- a/nftest.yml +++ b/nftest.yml @@ -3,11 +3,11 @@ global: temp_dir: test/work remove_temp: true clean_logs: true - nf_config: test/nftest.config cases: - - name: Example test + - name: SNV nf_script: ./main.nf + nf_config: test/snv.config params_file: test/nftest.yaml skip: false verbose: true diff --git a/test/nftest.config b/test/common.config similarity index 79% rename from test/nftest.config rename to test/common.config index 30ddb6e..b9ddd9a 100644 --- a/test/nftest.config +++ b/test/common.config @@ -1,10 +1,4 @@ -includeConfig "${projectDir}/config/default.config" -includeConfig "${projectDir}/config/methods.config" -includeConfig "${projectDir}/nextflow.config" - params { - // Choices: ["Mutect2", "HaplotypeCaller"] - variant_caller = "Mutect2" save_intermediate_files = true ucla_cds = false @@ -32,6 +26,3 @@ params { repeat_bed = "/hot/ref/database/RepeatMasker-3.0.1/processed/GRCh38/GRCh38_RepeatMasker_intervals.bed" } - -// Setup the pipeline config. DO NOT REMOVE THIS LINE! -methods.setup() diff --git a/test/snv.config b/test/snv.config new file mode 100644 index 0000000..1834eeb --- /dev/null +++ b/test/snv.config @@ -0,0 +1,13 @@ +includeConfig "${projectDir}/config/default.config" +includeConfig "${projectDir}/config/methods.config" +includeConfig "${projectDir}/nextflow.config" + +includeConfig "${projectDir}/test/common.config" + +params { + // Choices: ["Mutect2", "HaplotypeCaller"] + variant_caller = "Mutect2" +} + +// Setup the pipeline config. DO NOT REMOVE THIS LINE! +methods.setup() From 68c7085c8a0fb7ca9a05dedec94b3ffbb68f2004 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Wed, 24 Jul 2024 16:26:50 -0700 Subject: [PATCH 21/42] Add SV-specific NFTest, bugfix for parameters --- main.nf | 6 +++--- nftest.yml | 7 +++++++ test/sv.config | 19 +++++++++++++++++++ 3 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 test/sv.config diff --git a/main.nf b/main.nf index f01cfb8..2de36e8 100644 --- a/main.nf +++ b/main.nf @@ -149,9 +149,9 @@ workflow { // Take the SV branch workflow_extract_sv_annotations( validated_vcf_tuple, - Channel.value(header_contigs), - Channel.value(gnomad_rds), - Channel.value(chain_file), + Channel.value(params.header_contigs), + Channel.value(params.gnomad_rds), + Channel.value(params.chain_file), Channel.value(params.variant_caller) ) diff --git a/nftest.yml b/nftest.yml index d9253bc..5c13015 100644 --- a/nftest.yml +++ b/nftest.yml @@ -11,3 +11,10 @@ cases: params_file: test/nftest.yaml skip: false verbose: true + + - name: SV + nf_script: ./main.nf + nf_config: test/sv.config + params_file: test/nftest.yaml + skip: false + verbose: true diff --git a/test/sv.config b/test/sv.config new file mode 100644 index 0000000..201c0cd --- /dev/null +++ b/test/sv.config @@ -0,0 +1,19 @@ +includeConfig "${projectDir}/config/default.config" +includeConfig "${projectDir}/config/methods.config" +includeConfig "${projectDir}/nextflow.config" + +includeConfig "${projectDir}/test/common.config" + +params { + // Choices: ["Mutect2", "HaplotypeCaller"] + variant_caller = "Delly2" + + // FIXME Should this be bundled? + header_contigs = "/hot/code/nkwang/GitHub/uclahs-cds/project-method-AlgorithmEvaluation-BNCH-000142-GRCh37v38/report/manuscript/publish/GRCh38-vcf-header-contigs.txt" + + // FIXME Should this be bundled? + gnomad_rds = "/hot/code/nkwang/GitHub/uclahs-cds/project-method-AlgorithmEvaluation-BNCH-000142-GRCh37v38/report/manuscript/publish/data/gnomad.v4.0.sv.Rds" +} + +// Setup the pipeline config. DO NOT REMOVE THIS LINE! +methods.setup() From 4ae07bf8e5d1a0966e705a9ef2254fdb3916bd77 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Thu, 25 Jul 2024 14:27:29 -0700 Subject: [PATCH 22/42] Bundle rtracklayer into Docker --- Dockerfile | 26 +++++++++++++++++++++-- docker/install-stablelift.R | 41 +++++++++++++++++-------------------- 2 files changed, 43 insertions(+), 24 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3b95730..edd36ab 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,27 @@ -ARG R_VERSION=4.3.1 +ARG R_VERSION="4.3.1" FROM rocker/r-ver:${R_VERSION} AS build +ARG LIBBZ2_VERSION="1.0.8-5build1" +ARG LIBCURL_VERSION="7.81.0-1ubuntu1.16" +ARG LIBLZMA_VERSION="5.2.5-2ubuntu1" +ARG LIBXML2_VERSION="2.9.13+dfsg-1ubuntu0.4" +ARG ZLIB_VERSION="1:1.2.11.dfsg-2ubuntu9.2" + +# Install build-time dependencies +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + libbz2-dev=${LIBBZ2_VERSION} \ + libcurl4-openssl-dev=${LIBCURL_VERSION} \ + liblzma-dev=${LIBLZMA_VERSION} \ + libxml2-dev=${LIBXML2_VERSION} \ + zlib1g-dev=${ZLIB_VERSION} \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +ARG BIOC_VERSION="3.18" +ENV BIOC_VERSION=${BIOC_VERSION} + COPY docker/install-stablelift.R /tmp RUN Rscript /tmp/install-stablelift.R @@ -9,7 +29,9 @@ FROM rocker/r-ver:${R_VERSION} # Overwrite the site library with just the desired packages. By default rocker # only bundles docopt and littler in that directory. -COPY --from=build /tmp/userlib /usr/local/lib/R/site-library +COPY --from=build \ + /tmp/stablelift/renv/library/R-4.3/aarch64-unknown-linux-gnu \ + /usr/local/lib/R/site-library # Install python (required for argparse). The version is not important, but # let's pin it for stability. diff --git a/docker/install-stablelift.R b/docker/install-stablelift.R index b20a08d..a7db7df 100644 --- a/docker/install-stablelift.R +++ b/docker/install-stablelift.R @@ -1,26 +1,23 @@ -# Install the remotes package to the library -install.packages('remotes', lib = .Library) +install.packages('renv', lib = .Library) -# Make a temporary directory to hold all of the installed packages -localdir <- '/tmp/userlib' -dir.create(localdir) +options( + renv.settings.bioconductor.version = Sys.getenv("BIOC_VERSION") +) -dependencies <- c( - 'ROCR' = '1.0-11', - 'argparse' = '2.2.2', - 'caret' = '6.0-94', - 'data.table' = '1.14.8', - 'doParallel' = '1.0.17', - 'foreach' = '1.5.2', - 'ranger' = '0.15.1', - 'vcfR' = '1.14.0' +renv::init( + project = "/tmp/stablelift", + bare = TRUE, + bioconductor = Sys.getenv("BIOC_VERSION") ) -# Unfortunately, this will install the dependencies multiple times -for (name in names(dependencies)) { - remotes::install_version( - name, - unname(dependencies[name]), - lib = localdir - ) -} +renv::install(c( + 'ROCR@1.0-11', + 'argparse@2.2.2', + 'caret@6.0-94', + 'data.table@1.14.8', + 'doParallel@1.0.17', + 'foreach@1.5.2', + 'ranger@0.15.1', + 'vcfR@1.14.0', + 'bioc::rtracklayer@1.62.0' +)) From 8b87584c8dbc54301d618dc380bfc97d61476f62 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Thu, 25 Jul 2024 14:30:16 -0700 Subject: [PATCH 23/42] Group arguments in Dockerfile --- Dockerfile | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index edd36ab..f25846a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,13 +1,20 @@ ARG R_VERSION="4.3.1" -FROM rocker/r-ver:${R_VERSION} AS build - ARG LIBBZ2_VERSION="1.0.8-5build1" ARG LIBCURL_VERSION="7.81.0-1ubuntu1.16" ARG LIBLZMA_VERSION="5.2.5-2ubuntu1" ARG LIBXML2_VERSION="2.9.13+dfsg-1ubuntu0.4" +ARG PYTHON_VERSION="3.10.6-1~22.04" ARG ZLIB_VERSION="1:1.2.11.dfsg-2ubuntu9.2" +FROM rocker/r-ver:${R_VERSION} AS build + +ARG LIBBZ2_VERSION +ARG LIBCURL_VERSION +ARG LIBLZMA_VERSION +ARG LIBXML2_VERSION +ARG ZLIB_VERSION + # Install build-time dependencies RUN apt-get update \ && apt-get install -y --no-install-recommends \ @@ -35,7 +42,7 @@ COPY --from=build \ # Install python (required for argparse). The version is not important, but # let's pin it for stability. -ARG PYTHON_VERSION=3.10.6-1~22.04 +ARG PYTHON_VERSION RUN apt-get update \ && apt-get install -y --no-install-recommends \ From 5a2745a4f0bc4fee36d044374fb5c868d9b16a10 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Thu, 25 Jul 2024 14:34:15 -0700 Subject: [PATCH 24/42] Small bugfixes --- module/sv_workflow.nf | 4 ++-- nftest.yml | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/module/sv_workflow.nf b/module/sv_workflow.nf index 73cc690..9e78860 100644 --- a/module/sv_workflow.nf +++ b/module/sv_workflow.nf @@ -20,7 +20,7 @@ process run_sv_liftover{ script: """ - Rscript "${moduleDir}/scripts/liftover-Delly2-vcf.R \ + Rscript "${moduleDir}/scripts/liftover-Delly2-vcf.R" \ --input-vcf "${vcf}" \ --header-contigs "${header_contigs}" \ --chain-file "${chain_file}" \ @@ -52,7 +52,7 @@ process run_intersect_gnomad { script: """ - Rscript ${moduleDir}/scripts/publish/extract-vcf-features-SV.R \ + Rscript ${moduleDir}/scripts/publish/extract-vcf-features-SV.R" \ --variant-caller "${variant_caller}" \ --input-vcf "${vcf}" \ --output-rds "annotations.Rds" \ diff --git a/nftest.yml b/nftest.yml index 5c13015..74dc4e7 100644 --- a/nftest.yml +++ b/nftest.yml @@ -3,6 +3,7 @@ global: temp_dir: test/work remove_temp: true clean_logs: true + nf_config: test/common.config cases: - name: SNV From 7a485233491cecfa7d2e7051dd833880a01f9612 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Thu, 25 Jul 2024 15:11:19 -0700 Subject: [PATCH 25/42] Pre-copy folder to standard path --- Dockerfile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index f25846a..3b2b56f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -30,14 +30,18 @@ ARG BIOC_VERSION="3.18" ENV BIOC_VERSION=${BIOC_VERSION} COPY docker/install-stablelift.R /tmp + RUN Rscript /tmp/install-stablelift.R +RUN cp -r \ + "/tmp/stablelift/renv/library/R-4.3/*" \ + "/tmp/library-to-copy" FROM rocker/r-ver:${R_VERSION} # Overwrite the site library with just the desired packages. By default rocker # only bundles docopt and littler in that directory. COPY --from=build \ - /tmp/stablelift/renv/library/R-4.3/aarch64-unknown-linux-gnu \ + /tmp/library-to-copy \ /usr/local/lib/R/site-library # Install python (required for argparse). The version is not important, but From 2c4953f11a070cdd49d0b6eb0bd4240103c59672 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Thu, 25 Jul 2024 15:29:45 -0700 Subject: [PATCH 26/42] Remove quotes --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3b2b56f..5f01833 100644 --- a/Dockerfile +++ b/Dockerfile @@ -33,8 +33,8 @@ COPY docker/install-stablelift.R /tmp RUN Rscript /tmp/install-stablelift.R RUN cp -r \ - "/tmp/stablelift/renv/library/R-4.3/*" \ - "/tmp/library-to-copy" + /tmp/stablelift/renv/library/R-4.3/* \ + /tmp/library-to-copy FROM rocker/r-ver:${R_VERSION} From f03a5af9330edb766ff317bc39014e5a3b3ff0ae Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Thu, 25 Jul 2024 17:14:20 -0700 Subject: [PATCH 27/42] Try a different mechanism to get library paths --- Dockerfile | 24 ++++++++++++++++-------- docker/install-stablelift.R | 1 - 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5f01833..d37ed86 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,6 +6,7 @@ ARG LIBLZMA_VERSION="5.2.5-2ubuntu1" ARG LIBXML2_VERSION="2.9.13+dfsg-1ubuntu0.4" ARG PYTHON_VERSION="3.10.6-1~22.04" ARG ZLIB_VERSION="1:1.2.11.dfsg-2ubuntu9.2" +ARG RLIBDIR="/usr/local/stablelift-R" FROM rocker/r-ver:${R_VERSION} AS build @@ -29,20 +30,27 @@ RUN apt-get update \ ARG BIOC_VERSION="3.18" ENV BIOC_VERSION=${BIOC_VERSION} -COPY docker/install-stablelift.R /tmp +ARG RLIBDIR +ENV RENV_PATHS_CACHE ${RLIBDIR}/.cache + +RUN mkdir -p ${RENV_PATHS_CACHE} + +WORKDIR ${RLIBDIR} +COPY docker/install-stablelift.R /tmp RUN Rscript /tmp/install-stablelift.R -RUN cp -r \ - /tmp/stablelift/renv/library/R-4.3/* \ - /tmp/library-to-copy + +# renv prints to stdout, so we need to change directories +WORKDIR / +RUN echo ".libPaths( c( .libPaths(), \"/usr/local/stablelift-R/renv/library/R-4.3/$(Rscript -e "cat(unname(unlist(R.version['platform'])))")\" ) )" >> /usr/local/lib/R/etc/Rprofile.site FROM rocker/r-ver:${R_VERSION} -# Overwrite the site library with just the desired packages. By default rocker -# only bundles docopt and littler in that directory. +ARG RLIBDIR +COPY --from=build ${RLIBDIR} ${RLIBDIR} COPY --from=build \ - /tmp/library-to-copy \ - /usr/local/lib/R/site-library + /usr/local/lib/R/etc/Rprofile.site \ + /usr/local/lib/R/etc/Rprofile.site # Install python (required for argparse). The version is not important, but # let's pin it for stability. diff --git a/docker/install-stablelift.R b/docker/install-stablelift.R index a7db7df..f5543bb 100644 --- a/docker/install-stablelift.R +++ b/docker/install-stablelift.R @@ -5,7 +5,6 @@ options( ) renv::init( - project = "/tmp/stablelift", bare = TRUE, bioconductor = Sys.getenv("BIOC_VERSION") ) From 98750e93b9dccbb7c12ec4bfb198f53fc77fdff1 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Thu, 25 Jul 2024 15:46:43 -0700 Subject: [PATCH 28/42] Use branch version of image --- config/default.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/default.config b/config/default.config index b4fb07c..05eb6f7 100644 --- a/config/default.config +++ b/config/default.config @@ -22,7 +22,7 @@ params { gatk_version = '4.2.4.1' pipeval_version = '5.0.0-rc.3' samtools_version = '1.20' - stablelift_version = 'dev' // FIXME + stablelift_version = 'branch-nwiltsie-regroup-modules' // FIXME docker_image_bcftools = "${-> params.docker_container_registry}/bcftools-score:${params.bcftools_version}" docker_image_bedtools = "${-> params.docker_container_registry}/bedtools:${params.bedtools_version}" From 642aa56907d176e114adb3842aa9695a9bc1df02 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Fri, 26 Jul 2024 09:35:22 -0700 Subject: [PATCH 29/42] Bugfixes, test cleanup for SV case --- module/sv_workflow.nf | 2 +- nftest.yml | 4 ++-- test/common.config | 2 -- test/snv.config | 2 ++ test/{nftest.yaml => snv.yaml} | 0 test/sv.config | 2 ++ test/sv.yaml | 4 ++++ 7 files changed, 11 insertions(+), 5 deletions(-) rename test/{nftest.yaml => snv.yaml} (100%) create mode 100644 test/sv.yaml diff --git a/module/sv_workflow.nf b/module/sv_workflow.nf index 9e78860..591a2d4 100644 --- a/module/sv_workflow.nf +++ b/module/sv_workflow.nf @@ -52,7 +52,7 @@ process run_intersect_gnomad { script: """ - Rscript ${moduleDir}/scripts/publish/extract-vcf-features-SV.R" \ + Rscript "${moduleDir}/scripts/extract-vcf-features-SV.R" \ --variant-caller "${variant_caller}" \ --input-vcf "${vcf}" \ --output-rds "annotations.Rds" \ diff --git a/nftest.yml b/nftest.yml index 74dc4e7..6cbca4f 100644 --- a/nftest.yml +++ b/nftest.yml @@ -9,13 +9,13 @@ cases: - name: SNV nf_script: ./main.nf nf_config: test/snv.config - params_file: test/nftest.yaml + params_file: test/snv.yaml skip: false verbose: true - name: SV nf_script: ./main.nf nf_config: test/sv.config - params_file: test/nftest.yaml + params_file: test/sv.yaml skip: false verbose: true diff --git a/test/common.config b/test/common.config index b9ddd9a..e3d037e 100644 --- a/test/common.config +++ b/test/common.config @@ -3,8 +3,6 @@ params { ucla_cds = false - rf_model = "/hot/project/method/AlgorithmEvaluation/BNCH-000142-GRCh37v38/sSNV/stableLift/train_CPCG-40QC_Mutect2/RF-train_Mutect2_ntree2000_nodesize5_classratio0.Rds" - // Reference files funcotator_data { data_source = "/hot/ref/tool-specific-input/Funcotator/somatic/funcotator_dataSources.v1.7.20200521s" diff --git a/test/snv.config b/test/snv.config index 1834eeb..a87fc52 100644 --- a/test/snv.config +++ b/test/snv.config @@ -7,6 +7,8 @@ includeConfig "${projectDir}/test/common.config" params { // Choices: ["Mutect2", "HaplotypeCaller"] variant_caller = "Mutect2" + + rf_model = "/hot/project/method/AlgorithmEvaluation/BNCH-000142-GRCh37v38/sSNV/stableLift/train_CPCG-40QC_Mutect2/RF-train_Mutect2_ntree2000_nodesize5_classratio0.Rds" } // Setup the pipeline config. DO NOT REMOVE THIS LINE! diff --git a/test/nftest.yaml b/test/snv.yaml similarity index 100% rename from test/nftest.yaml rename to test/snv.yaml diff --git a/test/sv.config b/test/sv.config index 201c0cd..ded1405 100644 --- a/test/sv.config +++ b/test/sv.config @@ -8,6 +8,8 @@ params { // Choices: ["Mutect2", "HaplotypeCaller"] variant_caller = "Delly2" + rf_model = "/hot/project/method/AlgorithmEvaluation/BNCH-000142-GRCh37v38/gSV/stableLift/train_CPCG-40QC_Delly2/RF-train_Delly2_ntree2000_nodesize20_classratio0.Rds" + // FIXME Should this be bundled? header_contigs = "/hot/code/nkwang/GitHub/uclahs-cds/project-method-AlgorithmEvaluation-BNCH-000142-GRCh37v38/report/manuscript/publish/GRCh38-vcf-header-contigs.txt" diff --git a/test/sv.yaml b/test/sv.yaml new file mode 100644 index 0000000..75dedd9 --- /dev/null +++ b/test/sv.yaml @@ -0,0 +1,4 @@ +--- +sample_id: ExampleID +input: + vcf: /hot/project/method/AlgorithmEvaluation/BNCH-000142-GRCh37v38/validation/TCGA-SARC_WGS/GRCh37/Delly2/TCGA-SARC_WGS_Delly2_regenotype-gSV_bcftools-merge_filter-germline.vcf.gz From 6df89f06d99dd29b62b58baa3b9a17d9176650d5 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Mon, 29 Jul 2024 08:50:54 -0700 Subject: [PATCH 30/42] Add mermaid flow diagram --- README.md | 4 +-- docs/pipeline.mmd | 77 +++++++++++++++++++++++++++++++++++++++++++ docs/pipeline.mmd.svg | 1 + 3 files changed, 79 insertions(+), 3 deletions(-) create mode 100644 docs/pipeline.mmd create mode 100644 docs/pipeline.mmd.svg diff --git a/README.md b/README.md index 6d1ed5c..263196b 100644 --- a/README.md +++ b/README.md @@ -39,9 +39,7 @@ If you are using the UCLA Azure cluster, please use the [submission script](http ## Flow Diagram -A directed acyclic graph of your pipeline. The [PlantUML](https://plantuml.com/) code defining this diagram is version-controlled in the [docs/](./docs/) folder, and a [GitHub Action](https://github.com/uclahs-cds/tool-PlantUML-action) automatically regenerates the SVG image when that file is changed. - -![Pipeline Graph](./docs/pipeline-flow.svg) +![Pipeline Graph](./docs/pipeline.mmd.svg) --- diff --git a/docs/pipeline.mmd b/docs/pipeline.mmd new file mode 100644 index 0000000..1e4fb36 --- /dev/null +++ b/docs/pipeline.mmd @@ -0,0 +1,77 @@ +%%{init: {"flowchart": {"htmlLabels": false}} }%% + +flowchart TD + + classDef input fill:#ffffb3 + classDef output fill:#b3de69 + classDef gatk fill:#bebada + classDef bcftools fill:#fdb462 + classDef R fill:#8dd3c7 + classDef linux fill:#fb8072 + + subgraph legend ["`**Legend**`"] + direction RL + subgraph nodes ["`**Nodes**`"] + input[["Input File"]]:::input + input_node(["Parameterized Input"]):::input + output[["Output file"]]:::output + end + + subgraph processes ["`**Processes**`"] + gatk_docker[GATK]:::gatk + bcftools_docker[bcftools]:::bcftools + r_docker[Rscript]:::R + linux_docker[Generic Linux]:::linux + end + end + + legend + ~~~ input_vcf[["Input VCF"]]:::input + --> pipeval:::linux + --> sv_vs_snv{{Variant Caller?}} + + sv_vs_snv ------> r_liftover + header_contigs .-> r_liftover + chain_file2 ..-> r_liftover + gnomad_rds .-> r_extract_sv + + subgraph SV ["`**Delly2**`"] + %% Other input files + header_contigs([header_contigs]):::input + chain_file2([chain_file]):::input + gnomad_rds([gnomad_rds]):::input + + r_liftover[liftover-Delly2-vcf.R]:::R + ---> r_extract_sv[extract-VCF-features-SV.R]:::R + + end + + chain_file .-> bcftools_liftover + sv_vs_snv --> bcftools_liftover + + subgraph SNV ["`**Mutect2, HaplotypeCaller, Strelka2, Muse2, SomaticSniper**`"] + funcotator_sources([funcotator_sources]):::input + chain_file([chain_file]):::input + repeat_bed([repeat_bed]):::input + + bcftools_liftover[bcftools +liftover]:::bcftools + ---> gatk_func[gatk Funcotator]:::gatk + --> bcftools_annotate["`bcftools annotate*RepeatMasker*`"]:::bcftools + --> bcftools_annotate2["`bcftools annotate*Trinucleotide*`"]:::bcftools + --> r_extract_snv[extract-VCF-features.R]:::R + end + + funcotator_sources .-> gatk_func + repeat_bed .-> bcftools_annotate + + joinpaths{ } + r_extract_snv --> joinpaths + r_extract_sv --> joinpaths + joinpaths ---> r_predict_stability + + subgraph Predict Stability ["`        **Predict Stability**`"] + r_predict_stability[predict-liftover-stability.R]:::R + --> bcftools_annotate3["`bcftools annotate*Stability*`"]:::bcftools + + rf_model([rf_model]):::input .-> r_predict_stability + end diff --git a/docs/pipeline.mmd.svg b/docs/pipeline.mmd.svg new file mode 100644 index 0000000..d435482 --- /dev/null +++ b/docs/pipeline.mmd.svg @@ -0,0 +1 @@ +         Predict StabilityMutect2,HaplotypeCaller,Strelka2, Muse2,SomaticSniperDelly2bcftools annotateStabilitypredict-liftover-stability.Rrf_modelfuncotator_sourceschain_filerepeat_bedextract-VCF-features.Rbcftools annotateTrinucleotidebcftools annotateRepeatMaskergatk Funcotatorbcftools +liftoverheader_contigschain_filegnomad_rdsextract-VCF-features-SV.Rliftover-Delly2-vcf.RLegendNodesInput FileParameterized InputOutput fileProcessesGATKbcftoolsRscriptGeneric LinuxInput VCFpipevalVariant Caller? \ No newline at end of file From 971a7135d94e0cd7a191001b716014c169c7bf64 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Mon, 29 Jul 2024 08:55:01 -0700 Subject: [PATCH 31/42] Add output at end of pipeline --- docs/pipeline.mmd | 2 ++ docs/pipeline.mmd.svg | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/pipeline.mmd b/docs/pipeline.mmd index 1e4fb36..76fe200 100644 --- a/docs/pipeline.mmd +++ b/docs/pipeline.mmd @@ -75,3 +75,5 @@ flowchart TD rf_model([rf_model]):::input .-> r_predict_stability end + + bcftools_annotate3 --> output_vcfs[["Output VCFs"]]:::output diff --git a/docs/pipeline.mmd.svg b/docs/pipeline.mmd.svg index d435482..dfbf011 100644 --- a/docs/pipeline.mmd.svg +++ b/docs/pipeline.mmd.svg @@ -1 +1 @@ -         Predict StabilityMutect2,HaplotypeCaller,Strelka2, Muse2,SomaticSniperDelly2bcftools annotateStabilitypredict-liftover-stability.Rrf_modelfuncotator_sourceschain_filerepeat_bedextract-VCF-features.Rbcftools annotateTrinucleotidebcftools annotateRepeatMaskergatk Funcotatorbcftools +liftoverheader_contigschain_filegnomad_rdsextract-VCF-features-SV.Rliftover-Delly2-vcf.RLegendNodesInput FileParameterized InputOutput fileProcessesGATKbcftoolsRscriptGeneric LinuxInput VCFpipevalVariant Caller? \ No newline at end of file +         Predict StabilityMutect2,HaplotypeCaller,Strelka2, Muse2,SomaticSniperDelly2bcftools annotateStabilitypredict-liftover-stability.Rrf_modelfuncotator_sourceschain_filerepeat_bedextract-VCF-features.Rbcftools annotateTrinucleotidebcftools annotateRepeatMaskergatk Funcotatorbcftools +liftoverheader_contigschain_filegnomad_rdsextract-VCF-features-SV.Rliftover-Delly2-vcf.RLegendNodesInput FileParameterized InputOutput fileProcessesGATKbcftoolsRscriptGeneric LinuxInput VCFpipevalVariant Caller?Output VCFs \ No newline at end of file From 9291ff0cf5c37df9df92bc50a85545fc25610673 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Fri, 26 Jul 2024 12:58:25 -0700 Subject: [PATCH 32/42] Pull in latest changes to predict-liftover-stability.R --- module/scripts/predict-liftover-stability.R | 38 +++++++++++++++++++-- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/module/scripts/predict-liftover-stability.R b/module/scripts/predict-liftover-stability.R index 12892b9..4962f4f 100644 --- a/module/scripts/predict-liftover-stability.R +++ b/module/scripts/predict-liftover-stability.R @@ -33,6 +33,38 @@ for (arg in names(args)) { assign(gsub('_', '.', arg), args[[arg]]); } +#################################################################################################### +# Functions +#################################################################################################### +vcf.info.to.dt <- function(vcf.info) { + # Split each string by semicolon and convert to a list of key-value pairs + vcf.info <- strsplit(vcf.info, ';'); + vcf.info <- lapply(vcf.info, function(x) { + x <- strsplit(x, '='); + as.list(stats::setNames(sapply(x, `[`, 2), sapply(x, `[`, 1))); + }) + + # Combine the list of key-value pairs into a data table + rbindlist(vcf.info, fill = TRUE); + } + +# Sort datatable by chr then position +sort.genomic.dt <- function(x, chr = 'CHROM', pos = 'POS') { + setDT(x); + x[, eval(chr) := gsub('chr', '', get(chr))]; + x[, eval(chr) := gsub('X', '23', get(chr))]; + x[, eval(chr) := gsub('Y', '24', get(chr))]; + x[, eval(chr) := as.numeric(get(chr))]; + + setorderv(x, c(chr, pos), c(1, 1)); + + x[, eval(chr) := gsub('23', 'X', get(chr))]; + x[, eval(chr) := gsub('24', 'Y', get(chr))]; + x[, eval(chr) := paste0('chr', get(chr))]; + + return(x); + } + ################################################################################################### # Load data ################################################################################################### @@ -99,14 +131,12 @@ stability <- predict(rf.model, data = features.dt); performance.f <- performance(rf.model$prediction, measure = 'f'); index <- which.max(unlist(performance.f@y.values)); threshold <- unlist(performance.f@x.values)[index]; -# f.score <- unlist(performance.f@y.values)[index]; performance <- performance(rf.model$prediction, 'sens', 'spec'); sensitivity <- unlist(performance@y.values)[index]; specificity <- unlist(performance@x.values)[index]; -# cat(sprintf('Max F1-score = %.3f\n', f.score)); # Convert to stability units threshold.stability <- 1 - threshold; cat(sprintf('Threshold = %.3f\n', threshold.stability)); @@ -126,4 +156,6 @@ annotation.dt <- data.table( STABILITY_SCORE = format(round(stability$predictions[, 1], 4), nsmall = 4), STABILITY = ifelse(stability.classification == '1', 'UNSTABLE', 'STABLE') ); -fwrite(annotation.dt, file = output.tsv, sep = '\t', col.names = TRUE); +sort.genomic.dt(annotation.dt); +fwrite(annotation.dt, file = output.tsv, sep = '\t', col.names = FALSE); + From 729a9706b1e54aeb97334ff6ba6122235e680dbb Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Fri, 26 Jul 2024 13:40:29 -0700 Subject: [PATCH 33/42] Bugfix, channel mis-match --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index 2de36e8..abac9ca 100644 --- a/main.nf +++ b/main.nf @@ -155,7 +155,7 @@ workflow { Channel.value(params.variant_caller) ) - workflow_extract_sv_annotations.out.r_annotations.set { liftover_vcf } + workflow_extract_sv_annotations.out.liftover_vcf.set { liftover_vcf } workflow_extract_sv_annotations.out.r_annotations.set { r_annotations } } else { @@ -168,7 +168,7 @@ workflow { Channel.value(params.variant_caller) ) - workflow_extract_snv_annotations.out.r_annotations.set { liftover_vcf } + workflow_extract_snv_annotations.out.liftover_vcf.set { liftover_vcf } workflow_extract_snv_annotations.out.r_annotations.set { r_annotations } } From 5977e7e7a224dd8165557389a25cc68b74bc0b71 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Mon, 29 Jul 2024 11:47:32 -0700 Subject: [PATCH 34/42] Update CHANGELOG --- CHANGELOG.md | 57 ++++------------------------------------------ config/schema.yaml | 11 +++++++++ 2 files changed, 15 insertions(+), 53 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb5160a..2486627 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,5 @@ # Changelog -All notable changes to the pipeline-name pipeline. +All notable changes to the StableLift pipeline. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). @@ -8,58 +8,9 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm --- ## [Unreleased] -### Added -- Add `sample_id` extraction from BAM -- Add template input YAMLs -- Add pipeline-Nexflow-config as submodule and redirect set_resources_allocation -- Add pipeline-Nextflow-module as submodule -- Additional out of memory exit code -- Pipeline release action -- Template for NFTest testing results in PR template -- Enable dependabot -- Add example PlantUML image to README -- Add workflow to build documentation -- Add workflows to run Nextflow configuration tests - -### Changed -- Switch resource limit checks to external scripts -- Update links in on-prem Confluence to point to cloud-based Confluence -- Fix `CODEOWNERS` file -- Use `schema.check_path` for `workDir` validation -- Add `Discussions` and `Contributors` to the Table of Contents in `README.md` -- Update from DSL1 to DSL2 -- Standardize config structure -- Restructure repo so main script is main.nf -- Reorganize contributors and metadata -- Reorganize PR template so description is at top -- Update automatic node detection to allow for F2 detection -- Update Issue Template -- Standardize input/output/parameter structure in README -- Avoid modification of input parameter `output_dir` -- Create default docker container registry parameter for tools -- Use `methods.setup_process_afterscript()` to capture log files - ---- -## [1.0.0] - YYYY-MM-DD ### Added -- For new features. -- Added item 1. - -### Changed -- For changes in existing functionality. -- Changed item 1. - -### Deprecated -- For soon-to-be removed features. - -### Removed -- For now removed features. -- Removed item 1. - -### Fixed -- For any bug fixes. -- Fixed item 1. -### Security -- In case of vulnerabilities. +- Add workflow for SNV callers (Mutect2, HaplotypeCaller, Strelka2, Muse2, SomaticSniper) +- Add workflow for SV caller (Delly2) +- Add pipeline diagram diff --git a/config/schema.yaml b/config/schema.yaml index 7d2645b..76b8a2f 100644 --- a/config/schema.yaml +++ b/config/schema.yaml @@ -3,6 +3,17 @@ sample_id: type: 'String' required: true help: 'sample id supplied from input yaml' +variant_caller: + type: 'String' + required: true + help: 'Tool used to call structural or somatic variants' + choices: + - Mutect2 + - HaplotypeCaller + - Strelka2 + - Muse2 + - SomaticSniper + - Delly2 save_intermediate_files: type: 'Bool' required: true From 8e10b3bb52c24a6678a4cb56b1cf5dec1d41ac6c Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Mon, 29 Jul 2024 12:26:08 -0700 Subject: [PATCH 35/42] Fix lints --- docker/install-stablelift.R | 4 ++-- module/scripts/liftover-Delly2-vcf.R | 2 +- module/scripts/predict-liftover-stability.R | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/docker/install-stablelift.R b/docker/install-stablelift.R index f5543bb..ef50338 100644 --- a/docker/install-stablelift.R +++ b/docker/install-stablelift.R @@ -1,12 +1,12 @@ install.packages('renv', lib = .Library) options( - renv.settings.bioconductor.version = Sys.getenv("BIOC_VERSION") + renv.settings.bioconductor.version = Sys.getenv('BIOC_VERSION') ) renv::init( bare = TRUE, - bioconductor = Sys.getenv("BIOC_VERSION") + bioconductor = Sys.getenv('BIOC_VERSION') ) renv::install(c( diff --git a/module/scripts/liftover-Delly2-vcf.R b/module/scripts/liftover-Delly2-vcf.R index 827a3b1..d473091 100644 --- a/module/scripts/liftover-Delly2-vcf.R +++ b/module/scripts/liftover-Delly2-vcf.R @@ -151,7 +151,7 @@ for (i in seq_len(nrow(fix.lifted))) { output.vcf <- input.vcf; output.vcf@fix <- as.matrix(fix.lifted); output.vcf@gt <- as.matrix(gt.dt); -output.vcf@meta <- output.vcf@meta[!grepl("^##(contig|reference)", output.vcf@meta)]; +output.vcf@meta <- output.vcf@meta[!grepl('^##(contig|reference)', output.vcf@meta)]; output.vcf@meta <- c(output.vcf@meta, header.contigs); write.vcf(output.vcf, output); diff --git a/module/scripts/predict-liftover-stability.R b/module/scripts/predict-liftover-stability.R index 4962f4f..7f3f90a 100644 --- a/module/scripts/predict-liftover-stability.R +++ b/module/scripts/predict-liftover-stability.R @@ -158,4 +158,3 @@ annotation.dt <- data.table( ); sort.genomic.dt(annotation.dt); fwrite(annotation.dt, file = output.tsv, sep = '\t', col.names = FALSE); - From 22ccbc23aecf1473e350e4d94c4fd5f29e41d9ae Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Mon, 29 Jul 2024 14:43:50 -0700 Subject: [PATCH 36/42] Sort VCF after liftover in SV branch --- CHANGELOG.md | 4 ++++ module/sv_workflow.nf | 36 ++++++++++++++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2486627..bc93c82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,3 +14,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - Add workflow for SNV callers (Mutect2, HaplotypeCaller, Strelka2, Muse2, SomaticSniper) - Add workflow for SV caller (Delly2) - Add pipeline diagram + +### Changed + +- Sort VCF after liftover in SV branch diff --git a/module/sv_workflow.nf b/module/sv_workflow.nf index 591a2d4..c510a87 100644 --- a/module/sv_workflow.nf +++ b/module/sv_workflow.nf @@ -33,6 +33,35 @@ process run_sv_liftover{ """ } +process run_sort_BCFtools { + container params.docker_image_bcftools + + publishDir path: "${params.output_dir_base}/intermediate/${task.process.replace(':', '/')}", + pattern: "sorted.vcf.gz", + mode: "copy", + enabled: params.save_intermediate_files, + saveAs: { "BCFtools-sorted-${sample_id}.vcf.gz" } + + input: + tuple val(sample_id), path(vcf, stageAs: 'inputs/*') + + output: + tuple val(sample_id), path('sorted.vcf.gz'), emit: sorted_vcf + + script: + """ + bcftools sort \ + --output-type z \ + --output sorted.vcf.gz \ + ${vcf} + """ + + stub: + """ + touch sorted.vcf.gz + """ +} + process run_intersect_gnomad { container params.docker_image_stablelift @@ -81,15 +110,18 @@ workflow workflow_extract_sv_annotations { header_contigs, chain_file ) + run_sort_BCFtools( + run_sv_liftover.out.liftover_vcf + ) // Step 2: Extract features run_intersect_gnomad( - run_sv_liftover.out.liftover_vcf, + run_sort_BCFtools.out.sorted_vcf, gnomad_rds, variant_caller ) emit: - liftover_vcf = run_sv_liftover.out.liftover_vcf + liftover_vcf = run_sort_BCFtools.out.sorted_vcf r_annotations = run_intersect_gnomad.out.r_annotations } From 4701b01c9c58c70d62a3e4e7a5ad9469b269229c Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Fri, 2 Aug 2024 10:37:22 -0700 Subject: [PATCH 37/42] Reword 'Variant Caller' to 'Variant Type' --- docs/pipeline.mmd | 6 +++--- docs/pipeline.mmd.svg | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/pipeline.mmd b/docs/pipeline.mmd index 76fe200..f0bba92 100644 --- a/docs/pipeline.mmd +++ b/docs/pipeline.mmd @@ -28,14 +28,14 @@ flowchart TD legend ~~~ input_vcf[["Input VCF"]]:::input --> pipeval:::linux - --> sv_vs_snv{{Variant Caller?}} + --> sv_vs_snv{{Variant Type?}} sv_vs_snv ------> r_liftover header_contigs .-> r_liftover chain_file2 ..-> r_liftover gnomad_rds .-> r_extract_sv - subgraph SV ["`**Delly2**`"] + subgraph SV ["`**SV**`"] %% Other input files header_contigs([header_contigs]):::input chain_file2([chain_file]):::input @@ -49,7 +49,7 @@ flowchart TD chain_file .-> bcftools_liftover sv_vs_snv --> bcftools_liftover - subgraph SNV ["`**Mutect2, HaplotypeCaller, Strelka2, Muse2, SomaticSniper**`"] + subgraph SNV ["`**SNV**`"] funcotator_sources([funcotator_sources]):::input chain_file([chain_file]):::input repeat_bed([repeat_bed]):::input diff --git a/docs/pipeline.mmd.svg b/docs/pipeline.mmd.svg index dfbf011..714aef7 100644 --- a/docs/pipeline.mmd.svg +++ b/docs/pipeline.mmd.svg @@ -1 +1 @@ -         Predict StabilityMutect2,HaplotypeCaller,Strelka2, Muse2,SomaticSniperDelly2bcftools annotateStabilitypredict-liftover-stability.Rrf_modelfuncotator_sourceschain_filerepeat_bedextract-VCF-features.Rbcftools annotateTrinucleotidebcftools annotateRepeatMaskergatk Funcotatorbcftools +liftoverheader_contigschain_filegnomad_rdsextract-VCF-features-SV.Rliftover-Delly2-vcf.RLegendNodesInput FileParameterized InputOutput fileProcessesGATKbcftoolsRscriptGeneric LinuxInput VCFpipevalVariant Caller?Output VCFs \ No newline at end of file +         Predict StabilitySNVSVbcftools annotateStabilitypredict-liftover-stability.Rrf_modelfuncotator_sourceschain_filerepeat_bedextract-VCF-features.Rbcftools annotateTrinucleotidebcftools annotateRepeatMaskergatk Funcotatorbcftools +liftoverheader_contigschain_filegnomad_rdsextract-VCF-features-SV.Rliftover-Delly2-vcf.RLegendNodesInput FileParameterized InputOutput fileProcessesGATKbcftoolsRscriptGeneric LinuxInput VCFpipevalVariant Type?Output VCFs \ No newline at end of file From a5570f44a631cb37e180c54ef0bbb8ed0e516dea Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Fri, 2 Aug 2024 10:38:10 -0700 Subject: [PATCH 38/42] Remove unused R function --- module/scripts/predict-liftover-stability.R | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/module/scripts/predict-liftover-stability.R b/module/scripts/predict-liftover-stability.R index 7f3f90a..d8f5dd2 100644 --- a/module/scripts/predict-liftover-stability.R +++ b/module/scripts/predict-liftover-stability.R @@ -36,17 +36,6 @@ for (arg in names(args)) { #################################################################################################### # Functions #################################################################################################### -vcf.info.to.dt <- function(vcf.info) { - # Split each string by semicolon and convert to a list of key-value pairs - vcf.info <- strsplit(vcf.info, ';'); - vcf.info <- lapply(vcf.info, function(x) { - x <- strsplit(x, '='); - as.list(stats::setNames(sapply(x, `[`, 2), sapply(x, `[`, 1))); - }) - - # Combine the list of key-value pairs into a data table - rbindlist(vcf.info, fill = TRUE); - } # Sort datatable by chr then position sort.genomic.dt <- function(x, chr = 'CHROM', pos = 'POS') { From b12a428b093af73f3a1b15c4ab68cccaa0606b02 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Fri, 2 Aug 2024 10:38:53 -0700 Subject: [PATCH 39/42] s/run_sv_liftover/liftover_SV_StableLift/ --- module/sv_workflow.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/module/sv_workflow.nf b/module/sv_workflow.nf index c510a87..3bf0532 100644 --- a/module/sv_workflow.nf +++ b/module/sv_workflow.nf @@ -1,5 +1,5 @@ -process run_sv_liftover{ +process liftover_SV_StableLift{ container params.docker_image_stablelift publishDir path: "${params.output_dir_base}/intermediate/${task.process.replace(':', '/')}", @@ -105,13 +105,13 @@ workflow workflow_extract_sv_annotations { main: // Step 1: Liftover - run_sv_liftover( + liftover_SV_StableLift( vcf_with_sample_id, header_contigs, chain_file ) run_sort_BCFtools( - run_sv_liftover.out.liftover_vcf + liftover_SV_StableLift.out.liftover_vcf ) // Step 2: Extract features From eb8983a7dea4c0150f3aa5189a6eef02db2c0d89 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Fri, 2 Aug 2024 10:39:28 -0700 Subject: [PATCH 40/42] s/run_intersect_gnomad/annotate_gnomAD_StableLift/ --- module/sv_workflow.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/module/sv_workflow.nf b/module/sv_workflow.nf index 3bf0532..bdf2621 100644 --- a/module/sv_workflow.nf +++ b/module/sv_workflow.nf @@ -62,7 +62,7 @@ process run_sort_BCFtools { """ } -process run_intersect_gnomad { +process annotate_gnomAD_StableLift { container params.docker_image_stablelift publishDir path: "${params.output_dir_base}/intermediate/${task.process.replace(':', '/')}", @@ -115,7 +115,7 @@ workflow workflow_extract_sv_annotations { ) // Step 2: Extract features - run_intersect_gnomad( + annotate_gnomAD_StableLift( run_sort_BCFtools.out.sorted_vcf, gnomad_rds, variant_caller @@ -123,5 +123,5 @@ workflow workflow_extract_sv_annotations { emit: liftover_vcf = run_sort_BCFtools.out.sorted_vcf - r_annotations = run_intersect_gnomad.out.r_annotations + r_annotations = annotate_gnomAD_StableLift.out.r_annotations } From 5f27a637ea0c28f98b09e9b3acddd34fe1231344 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Tue, 6 Aug 2024 16:29:50 -0700 Subject: [PATCH 41/42] Add 'StableLift-${manifest.version}' to output_dir_base --- config/methods.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/methods.config b/config/methods.config index 7cbf874..05cf337 100644 --- a/config/methods.config +++ b/config/methods.config @@ -9,7 +9,7 @@ methods { set_output_dir = { def date = new Date().format("yyyyMMdd'T'HHmmss'Z'", TimeZone.getTimeZone('UTC')) - params.output_dir_base = "${params.output_dir}/${manifest.name}-${manifest.version}/${params.sample_id.replace(' ', '_')}" + params.output_dir_base = "${params.output_dir}/${manifest.name}-${manifest.version}/${params.sample_id.replace(' ', '_')}/StableLift-${manifest.version}" params.log_output_dir = "${params.output_dir_base}/log-${manifest.name}-${manifest.version}-${date}" } From c9fde8c44f876bc91b6c4518bf9ffa74350a9454 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Tue, 6 Aug 2024 16:45:55 -0700 Subject: [PATCH 42/42] Use wildcards for aptitude package build versions --- Dockerfile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index d37ed86..959ac76 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,11 @@ ARG R_VERSION="4.3.1" -ARG LIBBZ2_VERSION="1.0.8-5build1" -ARG LIBCURL_VERSION="7.81.0-1ubuntu1.16" -ARG LIBLZMA_VERSION="5.2.5-2ubuntu1" -ARG LIBXML2_VERSION="2.9.13+dfsg-1ubuntu0.4" -ARG PYTHON_VERSION="3.10.6-1~22.04" -ARG ZLIB_VERSION="1:1.2.11.dfsg-2ubuntu9.2" +ARG LIBBZ2_VERSION="1.0.8-*" +ARG LIBCURL_VERSION="7.81.0-*" +ARG LIBLZMA_VERSION="5.2.5-*" +ARG LIBXML2_VERSION="2.9.13+dfsg-*" +ARG PYTHON_VERSION="3.10.6-*" +ARG ZLIB_VERSION="1:1.2.11.dfsg-*" ARG RLIBDIR="/usr/local/stablelift-R" FROM rocker/r-ver:${R_VERSION} AS build