Skip to content

Commit

Permalink
VCF conversion fixes
Browse files Browse the repository at this point in the history
- handle non default column order
- handle case where ref > alt and alt != 1
  • Loading branch information
inodb committed May 5, 2020
1 parent 5c55c7b commit 958e5ef
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 10 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "genome-nexus-cli",
"version": "0.0.11",
"version": "0.0.12",
"description": "Genome Nexus Command Line Interface",
"main": "./bin/genome-nexus",
"repository": "https://github.com/genome-nexus/genome-nexus-cli",
Expand Down
57 changes: 48 additions & 9 deletions src/convert.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,36 @@ export type MAFRecord = {
};

export function convertVCFtoMAF(inputFile: string) {
// default VCF index of columns
let column_nr = {
CHROM: 0,
POS: 1,
REF: 3,
ALT: 4,
};

console.log(
'Chromosome\tStart_Position\tEnd_Position\tReference_Allele\tTumor_Seq_Allele2'
);
lineReader.eachLine(inputFile, function(line) {
if (!line.startsWith('#')) {
if (line.startsWith('#CHROM')) {
// handle non default order of columns
const fields = line.substring(1).split('\t');

let i = 0;
for (let field of fields) {
if (Object.keys(column_nr).includes(field)) {
column_nr[field] = i;
}
i++;
}
} else if (!line.startsWith('#')) {
const fields = line.split('\t');
const MafRecord = convertVCFRecordToMAFRecord({
CHROM: fields[0],
POS: parseInt(fields[1]),
REF: fields[3],
ALT: fields[4],
CHROM: fields[column_nr['CHROM']],
POS: parseInt(fields[column_nr['POS']]),
REF: fields[column_nr['REF']],
ALT: fields[column_nr['ALT']],
});
console.log(
`${MafRecord.Chromosome}\t${MafRecord.Start_Position}\t${MafRecord.End_Position}\t${MafRecord.Reference_Allele}\t${MafRecord.Tumor_Seq_Allele2}`
Expand All @@ -40,15 +59,35 @@ export function convertVCFRecordToMAFRecord(input: VCFRecord): MAFRecord {
return {
Chromosome: input.CHROM,
Start_Position: input.POS,
End_Position: input.POS,
End_Position: input.POS + (input.REF.length - 1),
Reference_Allele: input.REF,
Tumor_Seq_Allele2: input.ALT,
};
} else if (input.REF.length > input.ALT.length) {
if (input.ALT.length !== 1) {
throw new Error(
`VCF Record parsing error: unexpected ALT length\n${input}`
);
// find longest common prefix and remove
let longestCommonPrefix = '';
let i = 0;
for (let c of input.ALT) {
if (c === input.REF[i]) {
longestCommonPrefix += c;
i++;
} else {
break;
}
}

const mafRef = input.REF.substring(longestCommonPrefix.length);
const mafAlt = input.ALT.substring(longestCommonPrefix.length);
const mafStartPos = input.POS + longestCommonPrefix.length;
const mafEndPos = mafStartPos + mafRef.length - 1;
return {
Chromosome: input.CHROM,
Start_Position: mafStartPos,
End_Position: mafEndPos,
Reference_Allele: mafRef,
Tumor_Seq_Allele2: mafAlt,
};
} else if (input.REF[0] !== input.ALT) {
throw new Error(
`VCF Record parsing error: unexpected REF/ALT combo\n${input}`
Expand Down
2 changes: 2 additions & 0 deletions test/data/complex_indel.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#CHROM POS REF ALT
1 2488122 TGGGGGC TGGGGT

0 comments on commit 958e5ef

Please sign in to comment.