From 0ea5423e9284fafe8d366fcd504c321e96869acb Mon Sep 17 00:00:00 2001 From: Dennis Hendriksen Date: Thu, 14 Sep 2023 15:35:22 +0200 Subject: [PATCH] Fix: gvcf workflow results might be invalid for multi-sample projects (#465) --- config/nxf.config | 2 +- modules/gvcf/utils.nf | 43 ------------------------------------------- vip_gvcf.nf | 2 +- 3 files changed, 2 insertions(+), 45 deletions(-) delete mode 100644 modules/gvcf/utils.nf diff --git a/config/nxf.config b/config/nxf.config index 853366213..ba0302fb6 100644 --- a/config/nxf.config +++ b/config/nxf.config @@ -1,5 +1,5 @@ env { - VIP_VERSION = "6.0.0" + VIP_VERSION = "6.0.1" TMPDIR = "\${TMPDIR:-\${NXF_TEMP:-\$(mktemp -d)}}" APPTAINER_BIND = "${APPTAINER_BIND}" diff --git a/modules/gvcf/utils.nf b/modules/gvcf/utils.nf deleted file mode 100644 index e2b4eca1c..000000000 --- a/modules/gvcf/utils.nf +++ /dev/null @@ -1,43 +0,0 @@ -include { parseFastaIndex } from '../utils' - -def determineChunks(meta) { - def fastaContigs = parseFastaIndex(params[meta.project.assembly].reference.fastaFai).collectEntries { record -> [record.contig, record] } - def records = meta.sample.gvcf.stats.readLines().collect { line -> line.split('\t') } - - int chunkSize = 10000 - int maxNrRecords = records.size() > 0 ? Math.max((records.max { record -> record[2] as int })[2] as int, chunkSize) : chunkSize - - int regionNrRecords=0 - def regions=[] - def chunks=[] - records.each { record -> - def vcfContig = record[0] - def fastaContig = fastaContigs[vcfContig] - if(!fastaContig) { - def fasta = params[meta.project.assembly].reference.fasta - throw new IllegalArgumentException("vcf chromosome '${vcfContig}' does not exist in reference genome '${fasta}' (assembly '${meta.project.assembly}'). are you using the correct reference genome?") - } - int contigNrRecords = record[2] as int - if(regionNrRecords + contigNrRecords <= maxNrRecords) { - regions.add([chrom: fastaContig.contig, chromStart: 0, chromEnd: fastaContig.size]) - regionNrRecords += contigNrRecords - } - else { - chunks.add(regions) - regions=[] - regions.add([chrom: fastaContig.contig, chromStart: 0, chromEnd: fastaContig.size]) - regionNrRecords = contigNrRecords - } - } - if(regions.size > 0) { - chunks.add(regions) - } - - return chunks -} - -def scatter(meta) { - def chunks = determineChunks(meta) - def index = 0 - return !chunks.isEmpty() ? chunks.collect(chunk -> [*:meta, chunk: [index: index++, regions: chunk, total: chunks.size()] ]) : [[*:meta, chunk: [index: 0, regions: [], total: 0] ]] -} \ No newline at end of file diff --git a/vip_gvcf.nf b/vip_gvcf.nf index 05dc846ad..5f5be8cc3 100644 --- a/vip_gvcf.nf +++ b/vip_gvcf.nf @@ -3,7 +3,7 @@ nextflow.enable.dsl=2 include { parseCommonSampleSheet; getAssemblies } from './modules/sample_sheet' include { getCramRegex; getGenomeVcfRegex } from './modules/utils' include { validate } from './modules/gvcf/validate' -include { scatter } from './modules/gvcf/utils' +include { scatter } from './modules/utils' include { merge } from './modules/gvcf/merge' include { vcf; validateVcfParams } from './vip_vcf'