Skip to content

Commit

Permalink
Check for number of columns in --custom_vcf (#25)
Browse files Browse the repository at this point in the history
* accepts 8-10 columns, auto-detects for parsing bedtools intersect file
  • Loading branch information
lcoombe authored Jul 24, 2024
1 parent 80a94f3 commit 8dbac4d
Showing 1 changed file with 11 additions and 4 deletions.
15 changes: 11 additions & 4 deletions ntroot_cross_reference_vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,16 +181,23 @@ def print_vcf_line(ntedit_vcf, l_vcf, outfile):
f"{ntedit_vcf.integration}")
outfile.write(f"{out_str}\n")

def parse_bedtools_loj(infile, outfile, strip_info=False):
def parse_bedtools_loj(infile, outfile, header, strip_info=False):
"Parse the LOJ from bedtools to ntEdit-formatted VCF"
ntedit_vcf = None
l_vcf = None

num_col_vcf = len(header.split("\t")) # Standard single-sample VCF can have 8-10 columns
if num_col_vcf > 10 or num_col_vcf < 8:
message = (f"Expected 8-10 columns in VCF, got {num_col_vcf}. "
"Please ensure VCF is single-sample, and following "
"standard VCF specifications")
raise ValueError(message)

with open(infile, 'r', encoding="utf8") as fin:
for line in fin:
line = line.strip().split("\t")
ntedit_vcf_new = Vcf(*line[:10], parse_info=False, strip_info=strip_info)
l_vcf_new = Vcf(*line[10:18])
ntedit_vcf_new = Vcf(*line[:num_col_vcf], parse_info=False, strip_info=strip_info)
l_vcf_new = Vcf(*line[num_col_vcf:num_col_vcf+8]) # 8 columns in the -l VCF provided by ntRoot
if ntedit_vcf is not None and ntedit_vcf.position == ntedit_vcf_new.position \
and ntedit_vcf.chr == ntedit_vcf_new.chr:
# This is the same position as before, tally extra INFO from l_vcf.
Expand Down Expand Up @@ -322,7 +329,7 @@ def main():
write_header(args.vcf_l, fout, info_only=True)
fout.write(f"{header}\n")

parse_bedtools_loj(args.bedtools, fout, args.strip)
parse_bedtools_loj(args.bedtools, fout, header, args.strip)

refold_variants(f"{args.prefix}.tmp.vcf", args.prefix)

Expand Down

0 comments on commit 8dbac4d

Please sign in to comment.