From f60252396235b72fb5706dcd713893728457a67c Mon Sep 17 00:00:00 2001 From: Petr Danecek Date: Sat, 16 Nov 2024 16:16:58 +0100 Subject: [PATCH] Fix a bug in genotype filtering An empty genotype field (".") followed a non-empty field (e.g. "0/1") was not fully reset on next site, causing the program to crash. Resolves #2314 --- filter.c | 15 +++++---------- test/query.filter.14.1.out | 2 ++ test/query.filter.14.2.out | 1 + test/query.filter.14.3.out | 1 + test/query.filter.14.vcf | 6 ++++++ test/test.pl | 3 +++ 6 files changed, 18 insertions(+), 10 deletions(-) create mode 100644 test/query.filter.14.1.out create mode 100644 test/query.filter.14.2.out create mode 100644 test/query.filter.14.3.out create mode 100644 test/query.filter.14.vcf diff --git a/filter.c b/filter.c index 0a0e1387..dbf3a8a5 100644 --- a/filter.c +++ b/filter.c @@ -1191,12 +1191,9 @@ static void filters_set_format_string(filter_t *flt, bcf1_t *line, token_t *tok) } static void _filters_set_genotype(filter_t *flt, bcf1_t *line, token_t *tok, int type) { + tok->nvalues = tok->str_value.l = 0; bcf_fmt_t *fmt = bcf_get_fmt(flt->hdr, line, "GT"); - if ( !fmt ) - { - tok->nvalues = tok->str_value.l = 0; - return; - } + if ( !fmt ) return; int i,j, nsmpl = bcf_hdr_nsamples(flt->hdr), nvals1 = type==2 ? 3 : 4; if ( tok->str_value.m <= nvals1*nsmpl ) @@ -1276,12 +1273,10 @@ static void filters_set_genotype4(filter_t *flt, bcf1_t *line, token_t *tok) { _ static void filters_set_genotype_string(filter_t *flt, bcf1_t *line, token_t *tok) { + tok->nvalues = tok->str_value.l = 0; bcf_fmt_t *fmt = bcf_get_fmt(flt->hdr, line, "GT"); - if ( !fmt ) - { - tok->nvalues = 0; - return; - } + if ( !fmt ) return; + int i, blen = 4, nsmpl = line->n_sample; gt_length_too_big: diff --git a/test/query.filter.14.1.out b/test/query.filter.14.1.out new file mode 100644 index 00000000..01a222d9 --- /dev/null +++ b/test/query.filter.14.1.out @@ -0,0 +1,2 @@ +chr1:1 SM 0|1 +chr1:2 SM . diff --git a/test/query.filter.14.2.out b/test/query.filter.14.2.out new file mode 100644 index 00000000..928dc259 --- /dev/null +++ b/test/query.filter.14.2.out @@ -0,0 +1 @@ +chr1:2 SM . diff --git a/test/query.filter.14.3.out b/test/query.filter.14.3.out new file mode 100644 index 00000000..4f8ac762 --- /dev/null +++ b/test/query.filter.14.3.out @@ -0,0 +1 @@ +chr1:1 SM 0|1 diff --git a/test/query.filter.14.vcf b/test/query.filter.14.vcf new file mode 100644 index 00000000..2850cfe8 --- /dev/null +++ b/test/query.filter.14.vcf @@ -0,0 +1,6 @@ +##fileformat=VCFv4.2 +##contig= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SM +chr1 1 . A C . . . GT 0|1 +chr1 2 . G T . . . . . diff --git a/test/test.pl b/test/test.pl index 04637bab..57124dd7 100755 --- a/test/test.pl +++ b/test/test.pl @@ -112,6 +112,9 @@ run_test(\&test_vcf_merge,$opts,in=>['merge.gvcf.5.a','merge.gvcf.5.b'],out=>'merge.gvcf.5.1.out',args=>'--gvcf - --merge none'); run_test(\&test_vcf_merge,$opts,in=>['merge.gvcf.11.a','merge.gvcf.11.b','merge.gvcf.11.c'],out=>'merge.gvcf.11.1.out',args=>'--gvcf -'); # run_test(\&test_vcf_merge_big,$opts,in=>'merge_big.1',out=>'merge_big.1.1',nsmpl=>79000,nfiles=>79,nalts=>486,args=>''); # commented out for speed +run_test(\&test_vcf_query,$opts,in=>'query.filter.14',out=>'query.filter.14.1.out',args=>q[-f '%CHROM:%POS [ %SAMPLE %GT]']); +run_test(\&test_vcf_query,$opts,in=>'query.filter.14',out=>'query.filter.14.2.out',args=>q[-f '%CHROM:%POS [ %SAMPLE %GT]' -i'GT="."']); +run_test(\&test_vcf_query,$opts,in=>'query.filter.14',out=>'query.filter.14.3.out',args=>q[-f '%CHROM:%POS [ %SAMPLE %GT]' -i'GT="0|1"']); run_test(\&test_vcf_query,$opts,in=>'query.func.1',out=>'query.func.1.1.out',args=>q[-f '%CHROM:%POS\\t%INFO/AD\\t%SUM(INFO/AD)']); run_test(\&test_vcf_query,$opts,in=>'query.func.1',out=>'query.func.1.2.out',args=>q[-f '%CHROM:%POS\\t[%AD ]\\t%SUM(FORMAT/AD)']); run_test(\&test_vcf_query,$opts,in=>'query.func.1',out=>'query.func.1.3.out',args=>q[-f '%CHROM:%POS\\t[%AD ]\\t[ %SUM(FORMAT/AD)]']);