diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f1f1492..5e8eee65 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,11 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## [Unreleased] +### [Changed] +- Use `run_validate_PipeVal_with_metadata` to gate on validation +- Move index/dictionary file discovery to configuration stage +- Include all parameters from `default.config` in the README + --- ## [1.0.1] - 2024-05-29 diff --git a/README.md b/README.md index 60da9e5c..c78c6646 100644 --- a/README.md +++ b/README.md @@ -150,12 +150,31 @@ For normal-only or tumour-only samples, exclude the fields for the other state. | `bundle_v0_dbsnp138_vcf_gz` | Yes | path | Absolute path to dbsnp file, e.g., `/hot/ref/tool-specific-input/GATK/GRCh38/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz` | | `bundle_contest_hapmap_3p3_vcf_gz` | Yes | path | Absolute path to HapMap 3.3 biallelic sites file, e.g., `/hot/ref/tool-specific-input/GATK/GRCh38/Biallelic/hapmap_3.3.hg38.BIALLELIC.PASS.2021-09-01.vcf.gz` | | `work_dir` | optional | path | Path of working directory for Nextflow. When included in the sample config file, Nextflow intermediate files and logs will be saved to this directory. With ucla_cds, the default is `/scratch` and should only be changed for testing/development. Changing this directory to `/hot` or `/tmp` can lead to high server latency and potential disk space limitations, respectively. | -| `docker_container_registry` | optional | string | Registry containing tool Docker images. Default: `ghcr.io/uclahs-cds` | -| `metapipeline_delete_input_bams` | optional | boolean | Set to true to delete the input BAM files once the initial processing step is complete. **WARNING**: This option should NOT be used for individual runs of recalibate-BAM; it's intended for metapipeline-DNA to optimize disk space usage by removing files that are no longer needed from the `workDir`. | -| `metapipeline_final_output_dir` | optional | string | Absolute path for the final output directory of metapipeline-DNA that's expected to contain the output BAM from align-DNA. **WARNING**: This option should not be used for individual runs of recalibrate-BAM; it's intended for metapipeline-DNA to optimize disk space usage. | -| `metapipeline_states_to_delete` | optional | list | List of states for which to delete input BAMs. **WARNING**: This option should not be used for individual runs of recalibrate-BAM; it's intended for metapipeline-DNA to optimize disk space usage. | | `base_resource_update` | optional | namespace | Namespace of parameters to update base resource allocations in the pipeline. Usage and structure are detailed in `template.config` and below. | + +The below parameters have default values defined in [`default.config`](./config/default.config) and generally do not need to be set by the user. + +| Optional Parameter | Type | Description | +| :------------------| :----| :-----------| +| `metapipeline_delete_input_bams` | boolean | Set to true to delete the input BAM files once the initial processing step is complete. **WARNING**: This option should NOT be used for individual runs of recalibate-BAM; it's intended for metapipeline-DNA to optimize disk space usage by removing files that are no longer needed from the `workDir`. | +| `metapipeline_final_output_dir` | string | Absolute path for the final output directory of metapipeline-DNA that's expected to contain the output BAM from align-DNA. **WARNING**: This option should not be used for individual runs of recalibrate-BAM; it's intended for metapipeline-DNA to optimize disk space usage. | +| `metapipeline_states_to_delete` | list | List of states for which to delete input BAMs. **WARNING**: This option should not be used for individual runs of recalibrate-BAM; it's intended for metapipeline-DNA to optimize disk space usage. | +| `cache_intermediate_pipeline_steps` | boolean | Enable process caching from Nextflow. | +| `ucla_cds` | boolean | Overwrite default memory and CPU values by cluster-specific configs. | +| `docker_container_registry` | string | Registry containing tool Docker images. | +| `docker_image_gatk`, `gatk_version` | string | Docker image name and version for GATK. | +| `docker_image_pipeval`, `pipeval_version` | string | Docker image name and version for PipeVal. | +| `docker_image_gatk3`, `gatk3_version` | string | Docker image name and version for GATK3. | +| `docker_image_picard`, `picard_version` | string | Docker image name and version for Picard. | +| `docker_image_samtools`, `samtools_version` | string | Docker image name and version for SAMtools. | +| `reference_fasta_fai`, `reference_fasta_dict` | path | Index and dictionary files for the required input. Default: Matching `.fai` and `.dict` files in the same directory. | +| `bundle_v0_dbsnp138_vcf_gz_tbi` | path | Index file for the required input. Default: Matching `.tbi` file in the same directory. | +| `bundle_known_indels_vcf_gz_tbi` | path | Index file for the required input. Default: Matching `.tbi` file in the same directory. | +| `bundle_contest_hapmap_3p3_vcf_gz_tbi`| path | Index file for the required input. Default: Matching `.tbi` file in the same directory. | +| `bundle_mills_and_1000g_gold_standard_indels_vcf_gz_tbi` | path | Index file for the required input. Default: Matching `.tbi` file in the same directory. | + + #### Base resource allocation updaters To update the base resource (cpus or memory) allocations for processes, use the following structure and add the necessary parts. The default allocations can be found in the [node-specific config files](./config/) ```Nextflow diff --git a/config/default.config b/config/default.config index 1ad304cb..84bf5183 100644 --- a/config/default.config +++ b/config/default.config @@ -1,6 +1,8 @@ import nextflow.util.SysHelper +import nextflow.Nextflow // Default inputs/parameters of the pipeline + params { min_cpus = 1 min_memory = 1.MB @@ -30,6 +32,14 @@ params { docker_image_samtools = "${-> params.docker_container_registry}/samtools:${params.samtools_version}" gatk_ir_compression = 1 + + // These parameters are inferred from the input files. The user can override them in the config file if required. + reference_fasta_fai = "${-> params.reference_fasta}.fai" + reference_fasta_dict = "${-> Nextflow.file(params.reference_fasta).resolveSibling(Nextflow.file(params.reference_fasta).getBaseName() + '.dict')}" + bundle_known_indels_vcf_gz_tbi = "${-> params.bundle_known_indels_vcf_gz}.tbi" + bundle_contest_hapmap_3p3_vcf_gz_tbi = "${-> params.bundle_contest_hapmap_3p3_vcf_gz}.tbi" + bundle_mills_and_1000g_gold_standard_indels_vcf_gz_tbi = "${-> params.bundle_mills_and_1000g_gold_standard_indels_vcf_gz}.tbi" + bundle_v0_dbsnp138_vcf_gz_tbi = "${-> params.bundle_v0_dbsnp138_vcf_gz}.tbi" } // Process specific scope diff --git a/config/schema.yaml b/config/schema.yaml index 0449f0da..fbfcee75 100644 --- a/config/schema.yaml +++ b/config/schema.yaml @@ -56,26 +56,56 @@ reference_fasta: mode: 'r' required: true help: 'Absolute path to reference genome fasta' +reference_fasta_fai: + type: 'Path' + mode: 'r' + required: true + help: 'Absolute path to reference genome fasta index file' +reference_fasta_dict: + type: 'Path' + mode: 'r' + required: true + help: 'Absolute path to reference genome fasta dictionary' bundle_mills_and_1000g_gold_standard_indels_vcf_gz: type: 'Path' mode: 'r' required: true help: 'Absolute path to Mills and 1000g gold standard INDELs VCF' +bundle_mills_and_1000g_gold_standard_indels_vcf_gz_tbi: + type: 'Path' + mode: 'r' + required: true + help: 'Absolute path to Mills and 1000g gold standard INDELs VCF index file' bundle_known_indels_vcf_gz: type: 'Path' mode: 'r' required: true help: 'Absolute path to known INDELs VCF' +bundle_known_indels_vcf_gz_tbi: + type: 'Path' + mode: 'r' + required: true + help: 'Absolute path to known INDELs VCF index file' bundle_v0_dbsnp138_vcf_gz: type: 'Path' mode: 'r' required: true help: 'Absolute path to v0 dbSNP 138 VCF' +bundle_v0_dbsnp138_vcf_gz_tbi: + type: 'Path' + mode: 'r' + required: true + help: 'Absolute path to v0 dbSNP 138 VCF index file' bundle_contest_hapmap_3p3_vcf_gz: type: 'Path' mode: 'r' required: true help: 'Absolute path to ConEst HapMap 3p3 VCF' +bundle_contest_hapmap_3p3_vcf_gz_tbi: + type: 'Path' + mode: 'r' + required: true + help: 'Absolute path to ConEst HapMap 3p3 VCF index file' metapipeline_delete_input_bams: type: 'Bool' required: true diff --git a/main.nf b/main.nf index 82935d78..3afed0f8 100644 --- a/main.nf +++ b/main.nf @@ -25,7 +25,7 @@ Current Configuration: intervals: ${(params.is_targeted) ?: 'WGS'} Recalibration tables: ${params.input.recalibration_table} - - output: + - output: output: ${params.output_dir} output_dir_base: ${params.output_dir_base} log_output_dir: ${params.log_output_dir} @@ -43,7 +43,7 @@ Starting workflow... ------------------------------------ """ -include { run_validate_PipeVal } from './external/pipeline-Nextflow-module/modules/PipeVal/validate/main.nf' addParams( +include { run_validate_PipeVal_with_metadata } from './external/pipeline-Nextflow-module/modules/PipeVal/validate/main.nf' addParams( options: [ docker_image_version: params.pipeval_version, main_process: "./" //Save logs in /process-log/run_validate_PipeVal @@ -82,51 +82,46 @@ workflow { /** * Input channel processing */ - Channel.from(params.samples_to_process) - .map{ sample -> ['index': indexFile(sample.path)] + sample } - .set{ input_ch_samples_with_index } - - input_ch_samples_with_index - .map{ sample -> [sample.path, sample.index] } - .flatten() - .set{ input_ch_validate } - - input_ch_samples_with_index - .map{ sample -> sample.id } - .flatten() - .set{ input_ch_sample_ids } - - input_ch_samples_with_index - .reduce( ['bams': [], 'indices': []] ){ a, b -> - a.bams.add(b.path); - a.indices.add(b.index); - return a - } - .set{ input_ch_collected_files } - /** * Input validation */ - run_validate_PipeVal(input_ch_validate) + Channel.from(params.samples_to_process) + .flatMap { sample -> + def all_metadata = sample.findAll { it.key != "path" } + return [ + [sample.path, [all_metadata, "path"]], + [indexFile(sample.path), [[id: sample.id], "index"]] + ] + } | run_validate_PipeVal_with_metadata - run_validate_PipeVal.out.validation_result + run_validate_PipeVal_with_metadata.out.validation_result .collectFile( name: 'input_validation.txt', storeDir: "${params.output_dir_base}/validation" ) + run_validate_PipeVal_with_metadata.out.validated_file + .map { filename, metadata -> [metadata[0].id, metadata[0] + [(metadata[1]): filename]] } + .groupTuple() + .map { it[1].inject([:]) { result, i -> result + i } } + .set { validated_samples_with_index } + + // The elements of validated_samples_with_index are the same as + // params.samples_to_process, with the following changes: + // * sample.path is the validated BAM file + // * sample.index is the validated BAI file (new key) /** * Interval extraction and splitting */ - extract_GenomeIntervals("${file(params.reference_fasta).parent}/${file(params.reference_fasta).baseName}.dict") + extract_GenomeIntervals(params.reference_fasta_dict) run_SplitIntervals_GATK( extract_GenomeIntervals.out.genomic_intervals, params.reference_fasta, - "${params.reference_fasta}.fai", - "${file(params.reference_fasta).parent}/${file(params.reference_fasta).baseName}.dict" + params.reference_fasta_fai, + params.reference_fasta_dict ) run_SplitIntervals_GATK.out.interval_list @@ -143,18 +138,22 @@ workflow { /** * Indel realignment */ - input_ch_collected_files + validated_samples_with_index + .reduce( ['bams': [], 'indices': []] ){ a, b -> + a.bams.add(b.path); + a.indices.add(b.index); + return a + } .combine(input_ch_intervals) .map{ it -> it[0] + it[1] } .set{ input_ch_indel_realignment } realign_indels(input_ch_indel_realignment) - /** * Input file deletion */ - input_ch_samples_with_index + validated_samples_with_index .filter{ params.metapipeline_states_to_delete.contains(it.sample_type) } .map{ sample -> sample.path } .flatten() @@ -179,7 +178,7 @@ workflow { */ recalibrate_base( realign_indels.out.output_ch_realign_indels, - input_ch_sample_ids + validated_samples_with_index.map{ sample -> sample.id }.flatten() ) @@ -206,21 +205,21 @@ workflow { run_GetPileupSummaries_GATK( params.reference_fasta, - "${params.reference_fasta}.fai", - "${file(params.reference_fasta).parent}/${file(params.reference_fasta).baseName}.dict", + params.reference_fasta_fai, + params.reference_fasta_dict, params.bundle_contest_hapmap_3p3_vcf_gz, - "${params.bundle_contest_hapmap_3p3_vcf_gz}.tbi", + params.bundle_contest_hapmap_3p3_vcf_gz_tbi, input_ch_summary_intervals, input_ch_merged_bams ) - input_ch_samples_with_index + validated_samples_with_index .filter{ it.sample_type == 'normal' } .map{ it -> [sanitize_string(it.id)] } .join(run_GetPileupSummaries_GATK.out.pileupsummaries) .set{ normal_pileupsummaries } - input_ch_samples_with_index + validated_samples_with_index .filter{ it.sample_type == 'tumor' } .map{ it -> [sanitize_string(it.id)] } .join(run_GetPileupSummaries_GATK.out.pileupsummaries) @@ -254,8 +253,8 @@ workflow { run_DepthOfCoverage_GATK( params.reference_fasta, - "${params.reference_fasta}.fai", - "${file(params.reference_fasta).parent}/${file(params.reference_fasta).baseName}.dict", + params.reference_fasta_fai, + params.reference_fasta_dict, input_ch_summary_intervals, input_ch_merged_bams ) diff --git a/module/base-recalibration.nf b/module/base-recalibration.nf index e061591a..d6838568 100644 --- a/module/base-recalibration.nf +++ b/module/base-recalibration.nf @@ -15,8 +15,8 @@ include { reference_fasta: path to reference genome fasta file reference_fasta_fai: path to index for reference fasta reference_fasta_dict: path to dictionary for reference fasta - bundle_mills_and_1000g_gold_standards_vcf_gz: path to standard Mills and 1000 genomes variants - bundle_mills_and_1000g_gold_standards_vcf_gz_tbi: path to index file for Mills and 1000g variants + bundle_mills_and_1000g_gold_standard_indels_vcf_gz: path to standard Mills and 1000 genomes variants + bundle_mills_and_1000g_gold_standard_indels_vcf_gz_tbi: path to index file for Mills and 1000g variants bundle_known_indels_vcf_gz: path to set of known indels bundle_known_indels_vcf_gz_tbi: path to index of known indels VCF bundle_v0_dbsnp138_vcf_gz: path to dbSNP variants @@ -46,8 +46,8 @@ process run_BaseRecalibrator_GATK { path(reference_fasta) path(reference_fasta_fai) path(reference_fasta_dict) - path(bundle_mills_and_1000g_gold_standards_vcf_gz) - path(bundle_mills_and_1000g_gold_standards_vcf_gz_tbi) + path(bundle_mills_and_1000g_gold_standard_indels_vcf_gz) + path(bundle_mills_and_1000g_gold_standard_indels_vcf_gz_tbi) path(bundle_known_indels_vcf_gz) path(bundle_known_indels_vcf_gz_tbi) path(bundle_v0_dbsnp138_vcf_gz) @@ -71,7 +71,7 @@ process run_BaseRecalibrator_GATK { ${all_ir_bams} \ --reference ${reference_fasta} \ --verbosity INFO \ - --known-sites ${bundle_mills_and_1000g_gold_standards_vcf_gz} \ + --known-sites ${bundle_mills_and_1000g_gold_standard_indels_vcf_gz} \ --known-sites ${bundle_known_indels_vcf_gz} \ --known-sites ${bundle_v0_dbsnp138_vcf_gz} \ --output ${sample_id}_recalibration_table.grp \ @@ -182,14 +182,14 @@ workflow recalibrate_base { run_BaseRecalibrator_GATK( params.reference_fasta, - "${params.reference_fasta}.fai", - "${file(params.reference_fasta).parent}/${file(params.reference_fasta).baseName}.dict", + params.reference_fasta_fai, + params.reference_fasta_dict, params.bundle_mills_and_1000g_gold_standard_indels_vcf_gz, - "${params.bundle_mills_and_1000g_gold_standard_indels_vcf_gz}.tbi", + params.bundle_mills_and_1000g_gold_standard_indels_vcf_gz_tbi, params.bundle_known_indels_vcf_gz, - "${params.bundle_known_indels_vcf_gz}.tbi", + params.bundle_known_indels_vcf_gz_tbi, params.bundle_v0_dbsnp138_vcf_gz, - "${params.bundle_v0_dbsnp138_vcf_gz}.tbi", + params.bundle_v0_dbsnp138_vcf_gz_tbi, base_recalibrator_intervals, params.input.recalibration_table, input_ch_base_recalibrator @@ -220,8 +220,8 @@ workflow recalibrate_base { run_ApplyBQSR_GATK( params.reference_fasta, - "${params.reference_fasta}.fai", - "${file(params.reference_fasta).parent}/${file(params.reference_fasta).baseName}.dict", + params.reference_fasta_fai, + params.reference_fasta_dict, input_ch_apply_bqsr ) diff --git a/module/indel-realignment.nf b/module/indel-realignment.nf index afce5a0a..a34674fc 100644 --- a/module/indel-realignment.nf +++ b/module/indel-realignment.nf @@ -6,8 +6,8 @@ include { generate_standard_filename } from '../external/pipeline-Nextflow-modul reference_fasta: path to reference genome fasta file reference_fasta_fai: path to index for reference fasta reference_fasta_dict: path to dictionary for reference fasta - bundle_mills_and_1000g_gold_standards_vcf_gz: path to standard Mills and 1000 genomes variants - bundle_mills_and_1000g_gold_standards_vcf_gz_tbi: path to index file for Mills and 1000g variants + bundle_mills_and_1000g_gold_standard_indels_vcf_gz: path to standard Mills and 1000 genomes variants + bundle_mills_and_1000g_gold_standard_indels_vcf_gz_tbi: path to index file for Mills and 1000g variants bundle_known_indels_vcf_gz: path to set of known indels bundle_known_indels_vcf_gz_tbi: path to index of known indels VCF (bam, bam_index, interval_id, interval): @@ -34,8 +34,8 @@ process run_RealignerTargetCreator_GATK { path(reference_fasta) path(reference_fasta_fai) path(reference_fasta_dict) - path(bundle_mills_and_1000g_gold_standards_vcf_gz) - path(bundle_mills_and_1000g_gold_standards_vcf_gz_tbi) + path(bundle_mills_and_1000g_gold_standard_indels_vcf_gz) + path(bundle_mills_and_1000g_gold_standard_indels_vcf_gz_tbi) path(bundle_known_indels_vcf_gz) path(bundle_known_indels_vcf_gz_tbi) path(original_intervals) @@ -56,7 +56,7 @@ process run_RealignerTargetCreator_GATK { --analysis_type RealignerTargetCreator \ ${arg_bam} \ --reference_sequence ${reference_fasta} \ - --known ${bundle_mills_and_1000g_gold_standards_vcf_gz} \ + --known ${bundle_mills_and_1000g_gold_standard_indels_vcf_gz} \ --known ${bundle_known_indels_vcf_gz} \ --intervals ${interval} \ --out ${output_rtc_intervals} \ @@ -74,8 +74,8 @@ process run_RealignerTargetCreator_GATK { reference_fasta: path to reference genome fasta file reference_fasta_fai: path to index for reference fasta reference_fasta_dict: path to dictionary for reference fasta - bundle_mills_and_1000g_gold_standards_vcf_gz: path to standard Mills and 1000 genomes variants - bundle_mills_and_1000g_gold_standards_vcf_gz_tbi: path to index file for Mills and 1000g variants + bundle_mills_and_1000g_gold_standard_indels_vcf_gz: path to standard Mills and 1000 genomes variants + bundle_mills_and_1000g_gold_standard_indels_vcf_gz_tbi: path to index file for Mills and 1000g variants bundle_known_indels_vcf_gz: path to set of known indels bundle_known_indels_vcf_gz_tbi: path to index of known indels VCF (bam, bam_index, interval_id, interval, RTC_interval): @@ -103,8 +103,8 @@ process run_IndelRealigner_GATK { path(reference_fasta) path(reference_fasta_fai) path(reference_fasta_dict) - path(bundle_mills_and_1000g_gold_standards_vcf_gz) - path(bundle_mills_and_1000g_gold_standards_vcf_gz_tbi) + path(bundle_mills_and_1000g_gold_standard_indels_vcf_gz) + path(bundle_mills_and_1000g_gold_standard_indels_vcf_gz_tbi) path(bundle_known_indels_vcf_gz) path(bundle_known_indels_vcf_gz_tbi) tuple path(bam), path(bam_index), val(interval_id), path(scatter_intervals), path(target_intervals_RTC) @@ -134,7 +134,7 @@ process run_IndelRealigner_GATK { ${arg_bam} \ --reference_sequence ${reference_fasta} \ --bam_compression ${params.gatk_ir_compression} \ - --knownAlleles ${bundle_mills_and_1000g_gold_standards_vcf_gz} \ + --knownAlleles ${bundle_mills_and_1000g_gold_standard_indels_vcf_gz} \ --knownAlleles ${bundle_known_indels_vcf_gz} \ --allow_potentially_misencoded_quality_scores \ --targetIntervals ${target_intervals_RTC} \ @@ -161,24 +161,24 @@ workflow realign_indels { run_RealignerTargetCreator_GATK( params.reference_fasta, - "${params.reference_fasta}.fai", - "${file(params.reference_fasta).parent}/${file(params.reference_fasta).baseName}.dict", + params.reference_fasta_fai, + params.reference_fasta_dict, params.bundle_mills_and_1000g_gold_standard_indels_vcf_gz, - "${params.bundle_mills_and_1000g_gold_standard_indels_vcf_gz}.tbi", + params.bundle_mills_and_1000g_gold_standard_indels_vcf_gz_tbi, params.bundle_known_indels_vcf_gz, - "${params.bundle_known_indels_vcf_gz}.tbi", + params.bundle_known_indels_vcf_gz_tbi, "${params.getOrDefault('intervals', null) ?: params.work_dir + '/NO_FILE.bed'}", input_ch_rtc ) run_IndelRealigner_GATK( params.reference_fasta, - "${params.reference_fasta}.fai", - "${file(params.reference_fasta).parent}/${file(params.reference_fasta).baseName}.dict", + params.reference_fasta_fai, + params.reference_fasta_dict, params.bundle_mills_and_1000g_gold_standard_indels_vcf_gz, - "${params.bundle_mills_and_1000g_gold_standard_indels_vcf_gz}.tbi", + params.bundle_mills_and_1000g_gold_standard_indels_vcf_gz_tbi, params.bundle_known_indels_vcf_gz, - "${params.bundle_known_indels_vcf_gz}.tbi", + params.bundle_known_indels_vcf_gz_tbi, run_RealignerTargetCreator_GATK.out.ir_targets ) diff --git a/module/split-intervals.nf b/module/split-intervals.nf index 7f484ab2..f401aa26 100644 --- a/module/split-intervals.nf +++ b/module/split-intervals.nf @@ -3,9 +3,9 @@ input: intervals: path to set of target intervals to split - reference: path to reference genome fasta file - reference_index: path to index for reference fasta - reference_dict: path to dictionary for reference fasta + reference_fasta: path to reference genome fasta file + reference_fasta_index: path to index for reference fasta + reference_fasta_dict: path to dictionary for reference fasta params: params.output_dir_base: string(path) @@ -23,9 +23,9 @@ process run_SplitIntervals_GATK { input: path intervals - path reference - path reference_index - path reference_dict + path reference_fasta + path reference_fasta_index + path reference_fasta_dict output: path "*-contig.interval_list", emit: interval_list @@ -39,7 +39,7 @@ process run_SplitIntervals_GATK { for i in `grep -E '^(chr|)([0-9]+|X|Y|M)\$' ${intervals}` do gatk SplitIntervals \ - -R ${reference} \ + -R ${reference_fasta} \ -L ${intervals} \ -L \$i \ --interval-set-rule INTERSECTION \ @@ -51,7 +51,7 @@ process run_SplitIntervals_GATK { done gatk SplitIntervals \ - -R ${reference} \ + -R ${reference_fasta} \ -L ${intervals} \ \$assembled_chr_to_exclude \ --interval-set-rule INTERSECTION \ @@ -61,7 +61,7 @@ process run_SplitIntervals_GATK { mv 0000-scattered.interval_list nonassembled-contig.interval_list else gatk SplitIntervals \ - -R ${reference} \ + -R ${reference_fasta} \ -L ${intervals} \ --scatter-count ${params.scatter_count} \ ${params.split_intervals_extra_args} \ diff --git a/test/configtest-F16.json b/test/configtest-F16.json index e84f2742..0b2ea0e4 100644 --- a/test/configtest-F16.json +++ b/test/configtest-F16.json @@ -53,9 +53,13 @@ "aligner": "BWA-MEM2-2.2.1", "blcds_registered_dataset": false, "bundle_contest_hapmap_3p3_vcf_gz": "/hot/ref/tool-specific-input/GATK/GRCh38/Biallelic/hapmap_3.3.hg38.BIALLELIC.PASS.2021-09-01.vcf.gz", + "bundle_contest_hapmap_3p3_vcf_gz_tbi": "/hot/ref/tool-specific-input/GATK/GRCh38/Biallelic/hapmap_3.3.hg38.BIALLELIC.PASS.2021-09-01.vcf.gz.tbi", "bundle_known_indels_vcf_gz": "/hot/ref/tool-specific-input/GATK/GRCh38/Homo_sapiens_assembly38.known_indels.vcf.gz", + "bundle_known_indels_vcf_gz_tbi": "/hot/ref/tool-specific-input/GATK/GRCh38/Homo_sapiens_assembly38.known_indels.vcf.gz.tbi", "bundle_mills_and_1000g_gold_standard_indels_vcf_gz": "/hot/ref/tool-specific-input/GATK/GRCh38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz", + "bundle_mills_and_1000g_gold_standard_indels_vcf_gz_tbi": "/hot/ref/tool-specific-input/GATK/GRCh38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi", "bundle_v0_dbsnp138_vcf_gz": "/hot/ref/tool-specific-input/GATK/GRCh38/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz", + "bundle_v0_dbsnp138_vcf_gz_tbi": "/hot/ref/tool-specific-input/GATK/GRCh38/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz.tbi", "cache_intermediate_pipeline_steps": false, "dataset_id": "A-mini", "docker_container_registry": "ghcr.io/uclahs-cds", @@ -225,6 +229,8 @@ } }, "reference_fasta": "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta", + "reference_fasta_dict": "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.dict", + "reference_fasta_fai": "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta.fai", "samples_to_process": [ { "id": "4915723", diff --git a/test/configtest-F32.json b/test/configtest-F32.json index 107168a3..16fd04d0 100644 --- a/test/configtest-F32.json +++ b/test/configtest-F32.json @@ -53,9 +53,13 @@ "aligner": "BWA-MEM2-2.2.1", "blcds_registered_dataset": false, "bundle_contest_hapmap_3p3_vcf_gz": "/hot/ref/tool-specific-input/GATK/GRCh38/Biallelic/hapmap_3.3.hg38.BIALLELIC.PASS.2021-09-01.vcf.gz", + "bundle_contest_hapmap_3p3_vcf_gz_tbi": "/hot/ref/tool-specific-input/GATK/GRCh38/Biallelic/hapmap_3.3.hg38.BIALLELIC.PASS.2021-09-01.vcf.gz.tbi", "bundle_known_indels_vcf_gz": "/hot/ref/tool-specific-input/GATK/GRCh38/Homo_sapiens_assembly38.known_indels.vcf.gz", + "bundle_known_indels_vcf_gz_tbi": "/hot/ref/tool-specific-input/GATK/GRCh38/Homo_sapiens_assembly38.known_indels.vcf.gz.tbi", "bundle_mills_and_1000g_gold_standard_indels_vcf_gz": "/hot/ref/tool-specific-input/GATK/GRCh38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz", + "bundle_mills_and_1000g_gold_standard_indels_vcf_gz_tbi": "/hot/ref/tool-specific-input/GATK/GRCh38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi", "bundle_v0_dbsnp138_vcf_gz": "/hot/ref/tool-specific-input/GATK/GRCh38/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz", + "bundle_v0_dbsnp138_vcf_gz_tbi": "/hot/ref/tool-specific-input/GATK/GRCh38/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz.tbi", "cache_intermediate_pipeline_steps": false, "dataset_id": "A-mini", "docker_container_registry": "ghcr.io/uclahs-cds", @@ -225,6 +229,8 @@ } }, "reference_fasta": "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta", + "reference_fasta_dict": "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.dict", + "reference_fasta_fai": "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta.fai", "samples_to_process": [ { "id": "4915723",