Skip to content

Commit

Permalink
Make it possible to refer to the ID column from the FORMAT expression
Browse files Browse the repository at this point in the history
For example
    bcftools query test.vcf -f 'ID=%ID  ID=[ %/ID]  vs  FMT_ID=[ %ID]'

The same is now possible for CHROM, POS, REF, etc

Resolves #2337
  • Loading branch information
pd3 committed Jan 2, 2025
1 parent 83f0bbb commit 707336b
Show file tree
Hide file tree
Showing 8 changed files with 48 additions and 2 deletions.
4 changes: 4 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ Changes affecting specific commands:
bcftools query test.vcf -f '%CHROM:%POS \t [ %AD] \t %SUM(FMT/AD)'
bcftools query test.vcf -f '%CHROM:%POS \t [ %AD] \t %SUM(INFO/AD)'

- Make it possible to refer to the ID column from the FORMAT expression (#2337)

bcftools query test.vcf -f 'ID=%ID ID=[ %/ID] vs FMT_ID=[ %ID]'

* bcftools roh

- New visualization tool misc/roh-viz, see below
Expand Down
17 changes: 15 additions & 2 deletions convert.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* convert.c -- functions for converting between VCF/BCF and related formats.
Copyright (C) 2013-2024 Genome Research Ltd.
Copyright (C) 2013-2025 Genome Research Ltd.
Author: Petr Danecek <[email protected]>
Expand Down Expand Up @@ -1466,12 +1466,25 @@ static int parse_subscript(char **p)

static char *parse_tag(convert_t *convert, char *p, int is_gtf)
{
int is_vcf_column = p[1]=='/' ? 1 : 0;
if ( is_vcf_column ) p++;

char *q = ++p;
while ( *q && (isalnum(*q) || *q=='_' || *q=='.') ) q++;
kstring_t str = {0,0,0};
if ( q-p==0 ) error("Could not parse format string: %s\n", convert->format_str);
kputsn(p, q-p, &str);
if ( is_gtf )
if ( is_gtf && is_vcf_column )
{
_SET_NON_FORMAT_TAGS(register_tag, str.s, convert, str.s, is_gtf)
else if ( !strcmp(str.s, "ALT") )
{
fmt_t *fmt = register_tag(convert, str.s, is_gtf, T_ALT);
fmt->subscript = parse_subscript(&q);
}
else error("Could not parse tag: %s .. %s\n", str.s,convert->format_str);
}
else if ( is_gtf )
{
_SET_FILTER_EXPR(convert,set_filter_expr,p,q,1)
else if ( !strcmp(str.s, "SAMPLE") ) register_tag(convert, "SAMPLE", is_gtf, T_SAMPLE);
Expand Down
3 changes: 3 additions & 0 deletions doc/bcftools.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3253,6 +3253,9 @@ Extracts fields from VCF or BCF files and outputs them in user-defined format.
bcftools query -f '[%SAMPLE %GT %DP\n]' -i 'FMT/DP=1 || FMT/DP=2' file.vcf
bcftools query -f '[%SAMPLE %GT %DP\n]' -i 'FMT/DP=1 | FMT/DP=2' file.vcf

# Refer to ID column vs INFO/ID tag vs FORMAT/ID tag
bcftools query -f 'columnID=%ID infoID=%INFO/ID [fmtID=%ID ] [columnID=%/ID]'


[[reheader]]
=== bcftools reheader ['OPTIONS'] 'file.vcf.gz'
Expand Down
1 change: 1 addition & 0 deletions test/query.3.1.out
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1 1000 ID G A 999 FILTER infoCHROM infoPOS infoID infoREF infoALT infoQUAL infoFILTER
1 change: 1 addition & 0 deletions test/query.3.2.out
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
fmtCHROM1 fmtCHROM2 fmtPOS1 fmtPOS2 fmtID1 fmtID2 fmtREF1 fmtREF2 fmtALT1 fmtALT2 fmtQUAL1 fmtQUAL2 fmtFILTER1 fmtFILTER2
1 change: 1 addition & 0 deletions test/query.3.3.out
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1 1 1000 1000 ID ID G G A A 999 999 FILTER FILTER
20 changes: 20 additions & 0 deletions test/query.3.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
##fileformat=VCFv4.2
##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
##contig=<ID=1,assembly=b37,length=249250621>
##FILTER=<ID=FILTER,Description="">
##INFO=<ID=CHROM,Number=.,Type=String,Description="">
##INFO=<ID=POS,Number=.,Type=String,Description="">
##INFO=<ID=ID,Number=.,Type=String,Description="">
##INFO=<ID=REF,Number=.,Type=String,Description="">
##INFO=<ID=ALT,Number=.,Type=String,Description="">
##INFO=<ID=QUAL,Number=.,Type=String,Description="">
##INFO=<ID=FILTER,Number=.,Type=String,Description="">
##FORMAT=<ID=CHROM,Number=.,Type=String,Description="">
##FORMAT=<ID=POS,Number=.,Type=String,Description="">
##FORMAT=<ID=ID,Number=.,Type=String,Description="">
##FORMAT=<ID=REF,Number=.,Type=String,Description="">
##FORMAT=<ID=ALT,Number=.,Type=String,Description="">
##FORMAT=<ID=QUAL,Number=.,Type=String,Description="">
##FORMAT=<ID=FILTER,Number=.,Type=String,Description="">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT a b
1 1000 ID G A 999 FILTER CHROM=infoCHROM;POS=infoPOS;ID=infoID;REF=infoREF;ALT=infoALT;QUAL=infoQUAL;FILTER=infoFILTER CHROM:POS:ID:REF:ALT:QUAL:FILTER fmtCHROM1:fmtPOS1:fmtID1:fmtREF1:fmtALT1:fmtQUAL1:fmtFILTER1 fmtCHROM2:fmtPOS2:fmtID2:fmtREF2:fmtALT2:fmtQUAL2:fmtFILTER2
3 changes: 3 additions & 0 deletions test/test.pl
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@
run_test(\&test_vcf_merge,$opts,in=>['merge.gvcf.5.a','merge.gvcf.5.b'],out=>'merge.gvcf.5.1.out',args=>'--gvcf - --merge none');
run_test(\&test_vcf_merge,$opts,in=>['merge.gvcf.11.a','merge.gvcf.11.b','merge.gvcf.11.c'],out=>'merge.gvcf.11.1.out',args=>'--gvcf -');
# run_test(\&test_vcf_merge_big,$opts,in=>'merge_big.1',out=>'merge_big.1.1',nsmpl=>79000,nfiles=>79,nalts=>486,args=>''); # commented out for speed
run_test(\&test_vcf_query,$opts,in=>'query.3',out=>'query.3.1.out',args=>q[-f '%CHROM %POS %ID %REF %ALT %QUAL %FILTER \\t %INFO/CHROM %INFO/POS %INFO/ID %INFO/REF %INFO/ALT %INFO/QUAL %INFO/FILTER']);
run_test(\&test_vcf_query,$opts,in=>'query.3',out=>'query.3.2.out',args=>q[-f '[ %CHROM] \\t [ %POS] \\t [ %ID] \\t [ %REF] \\t [ %ALT] \\t [ %QUAL] \\t [ %FILTER]']);
run_test(\&test_vcf_query,$opts,in=>'query.3',out=>'query.3.3.out',args=>q[-f '[ %/CHROM] \\t [ %/POS] \\t [ %/ID] \\t [ %/REF] \\t [ %/ALT] \\t [ %/QUAL] \\t [ %/FILTER]']);
run_test(\&test_vcf_query,$opts,in=>'query.filter.14',out=>'query.filter.14.1.out',args=>q[-f '%CHROM:%POS [ %SAMPLE %GT]']);
run_test(\&test_vcf_query,$opts,in=>'query.filter.14',out=>'query.filter.14.2.out',args=>q[-f '%CHROM:%POS [ %SAMPLE %GT]' -i'GT="."']);
run_test(\&test_vcf_query,$opts,in=>'query.filter.14',out=>'query.filter.14.3.out',args=>q[-f '%CHROM:%POS [ %SAMPLE %GT]' -i'GT="0|1"']);
Expand Down

0 comments on commit 707336b

Please sign in to comment.