From 958e5ef272b346726254e79efc97b6827a50e916 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=F0=9F=94=A7=20Ino=20de=20Bruijn=20=F0=9F=A7=AC?= Date: Tue, 5 May 2020 11:35:48 -0400 Subject: [PATCH] VCF conversion fixes - handle non default column order - handle case where ref > alt and alt != 1 --- package.json | 2 +- src/convert.ts | 57 +++++++++++++++++++++++++++++++------ test/data/complex_indel.vcf | 2 ++ 3 files changed, 51 insertions(+), 10 deletions(-) create mode 100644 test/data/complex_indel.vcf diff --git a/package.json b/package.json index 31ab534..1de4ce9 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "genome-nexus-cli", - "version": "0.0.11", + "version": "0.0.12", "description": "Genome Nexus Command Line Interface", "main": "./bin/genome-nexus", "repository": "https://github.com/genome-nexus/genome-nexus-cli", diff --git a/src/convert.ts b/src/convert.ts index 357c214..11cdb10 100644 --- a/src/convert.ts +++ b/src/convert.ts @@ -16,17 +16,36 @@ export type MAFRecord = { }; export function convertVCFtoMAF(inputFile: string) { + // default VCF index of columns + let column_nr = { + CHROM: 0, + POS: 1, + REF: 3, + ALT: 4, + }; + console.log( 'Chromosome\tStart_Position\tEnd_Position\tReference_Allele\tTumor_Seq_Allele2' ); lineReader.eachLine(inputFile, function(line) { - if (!line.startsWith('#')) { + if (line.startsWith('#CHROM')) { + // handle non default order of columns + const fields = line.substring(1).split('\t'); + + let i = 0; + for (let field of fields) { + if (Object.keys(column_nr).includes(field)) { + column_nr[field] = i; + } + i++; + } + } else if (!line.startsWith('#')) { const fields = line.split('\t'); const MafRecord = convertVCFRecordToMAFRecord({ - CHROM: fields[0], - POS: parseInt(fields[1]), - REF: fields[3], - ALT: fields[4], + CHROM: fields[column_nr['CHROM']], + POS: parseInt(fields[column_nr['POS']]), + REF: fields[column_nr['REF']], + ALT: fields[column_nr['ALT']], }); console.log( `${MafRecord.Chromosome}\t${MafRecord.Start_Position}\t${MafRecord.End_Position}\t${MafRecord.Reference_Allele}\t${MafRecord.Tumor_Seq_Allele2}` @@ -40,15 +59,35 @@ export function convertVCFRecordToMAFRecord(input: VCFRecord): MAFRecord { return { Chromosome: input.CHROM, Start_Position: input.POS, - End_Position: input.POS, + End_Position: input.POS + (input.REF.length - 1), Reference_Allele: input.REF, Tumor_Seq_Allele2: input.ALT, }; } else if (input.REF.length > input.ALT.length) { if (input.ALT.length !== 1) { - throw new Error( - `VCF Record parsing error: unexpected ALT length\n${input}` - ); + // find longest common prefix and remove + let longestCommonPrefix = ''; + let i = 0; + for (let c of input.ALT) { + if (c === input.REF[i]) { + longestCommonPrefix += c; + i++; + } else { + break; + } + } + + const mafRef = input.REF.substring(longestCommonPrefix.length); + const mafAlt = input.ALT.substring(longestCommonPrefix.length); + const mafStartPos = input.POS + longestCommonPrefix.length; + const mafEndPos = mafStartPos + mafRef.length - 1; + return { + Chromosome: input.CHROM, + Start_Position: mafStartPos, + End_Position: mafEndPos, + Reference_Allele: mafRef, + Tumor_Seq_Allele2: mafAlt, + }; } else if (input.REF[0] !== input.ALT) { throw new Error( `VCF Record parsing error: unexpected REF/ALT combo\n${input}` diff --git a/test/data/complex_indel.vcf b/test/data/complex_indel.vcf new file mode 100644 index 0000000..91c79bd --- /dev/null +++ b/test/data/complex_indel.vcf @@ -0,0 +1,2 @@ +#CHROM POS REF ALT +1 2488122 TGGGGGC TGGGGT