-
Notifications
You must be signed in to change notification settings - Fork 40
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
160 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
readme: | | ||
DeepVariant+gvcf_norm | ||
input: | ||
header : |- | ||
##fileformat=VCFv4.2 | ||
##FILTER=<ID=PASS,Description="All filters passed"> | ||
##FILTER=<ID=RefCall,Description="Genotyping model thinks this site is reference."> | ||
##FILTER=<ID=LowQual,Description="Confidence in this variant being real is below calling threshold." | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> | ||
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth of all passing filters reads."> | ||
##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum DP observed within the GVCF block."> | ||
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Read depth of all passing filters reads for each | ||
##FORMAT=<ID=VAF,Number=A,Type=Float,Description="Variant allele fractions."> | ||
##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype likelihoods, log10 encoded"> | ||
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Genotype likelihoods, Phred encoded"> | ||
##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval"> | ||
##contig=<ID=chr21,length=48129895> | ||
##contig=<ID=chr22,length=51304566> | ||
##INFO=<ID=gvcf_norm_originalPOS,Number=1,Type=Integer,Description="POS before gvcf_norm left-aligned the variant"> | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT | ||
body: | ||
- NA12878.gvcf: | | ||
NA12878 | ||
chr21 26193733 . T <*> 0 . END=26193733 GT:GQ:MIN_DP:PL 0/0:39:33:0,39,869 | ||
chr21 26193734 . G T,<*> 29 PASS . GT:GQ:DP:AD:VAF:PL 0/1:29:33:21,12,0:0.363636,0:29,0,48,990,990,990 | ||
chr21 26193735 . T <*> 0 . END=26193740 GT:GQ:MIN_DP:PL 0/0:50:32:0,69,929 | ||
chr21 26193741 . TTTTTT T,<*> 29.1 PASS . GT:GQ:DP:AD:VAF:PL 0/1:29:32:22,8,0:0.25,0:29,0,55,990,990,990 | ||
chr21 26193747 . T <*> 0 . END=26193751 GT:GQ:MIN_DP:PL 0/0:50:22:0,66,659 | ||
chr21 29848774 . T <*> 0 . END=29848778 GT:GQ:MIN_DP:PL 0/0:50:30:0,99,989 | ||
chr21 29848779 . AT ATATATTT,T,<*> 40.3 PASS . GT:GQ:DP:AD:VAF:PL 1/2:16:32:1,11,12,0:0.34375,0.375,0:40,19,47,19,0,57,990,990,990,990 | ||
chr21 29848781 . T <*> 0 . END=29848791 GT:GQ:MIN_DP:PL 0/0:50:19:0,57,569 | ||
- NA12878_norm.gvcf: | | ||
NA12878_norm | ||
chr21 26193733 . T <*> 0 . END=26193733 GT:GQ:MIN_DP:PL 0/0:39:33:0,39,869 | ||
chr21 26193734 . G T,<*> 29 PASS . GT:GQ:DP:AD:VAF:PL 0/1:29:33:21,12,0:0.363636,0:29,0,48,990,990,990 | ||
chr21 26193734 . GTTTTT G,<*> 29.1 PASS gvcf_norm_originalPOS=26193741 GT:GQ:DP:AD:VAF:PL 0/1:29:32:22,8,0:0.25,0:29,0,55,990,990,990 | ||
chr21 26193735 . T <*> 0 . END=26193740 GT:GQ:MIN_DP:PL 0/0:50:32:0,69,929 | ||
chr21 26193747 . T <*> 0 . END=26193751 GT:GQ:MIN_DP:PL 0/0:50:22:0,66,659 | ||
chr21 29848774 . T <*> 0 . END=29848778 GT:GQ:MIN_DP:PL 0/0:50:30:0,99,989 | ||
chr21 29848778 . TA TATATATT,T,<*> 40.3 PASS gvcf_norm_originalPOS=29848779 GT:GQ:DP:AD:VAF:PL 1/2:16:32:1,11,12,0:0.34375,0.375,0:40,19,47,19,0,57,990,990,990,990 | ||
chr21 29848781 . T <*> 0 . END=29848791 GT:GQ:MIN_DP:PL 0/0:50:19:0,57,569 | ||
unifier_config: | ||
min_AQ1: 0 | ||
min_AQ2: 0 | ||
min_GQ: 0 | ||
monoallelic_sites_for_lost_alleles: true | ||
|
||
genotyper_config: | ||
required_dp: 0 | ||
revise_genotypes: true | ||
snv_prior_calibration: 0.375 | ||
indel_prior_calibration: 0.375 | ||
allow_partial_data: true | ||
more_PL: true | ||
trim_uncalled_alleles: true | ||
liftover_fields: | ||
- orig_names: [MIN_DP, DP] | ||
name: DP | ||
description: '##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">' | ||
type: int | ||
combi_method: min | ||
number: basic | ||
count: 1 | ||
ignore_non_variants: true | ||
- orig_names: [AD] | ||
name: AD | ||
description: '##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">' | ||
type: int | ||
number: alleles | ||
combi_method: min | ||
default_type: zero | ||
count: 0 | ||
- orig_names: [GQ] | ||
name: GQ | ||
description: '##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">' | ||
type: int | ||
number: basic | ||
combi_method: min | ||
count: 1 | ||
ignore_non_variants: true | ||
- orig_names: [PL] | ||
name: PL | ||
description: '##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred-scaled genotype Likelihoods">' | ||
type: int | ||
number: genotype | ||
combi_method: missing | ||
count: 0 | ||
ignore_non_variants: true | ||
|
||
truth_unified_sites: | ||
- range: {ref: chr21, beg: 26193734, end: 26193739} | ||
in_target: {ref: chr21, beg: 1, end: 1000000000} | ||
alleles: | ||
- dna: GTTTTT | ||
- dna: TTTTTT | ||
normalized: | ||
range: {beg: 26193734, end: 26193734} | ||
dna: T | ||
quality: 29 | ||
frequency: 0.5 | ||
- dna: G | ||
quality: 29 | ||
frequency: 0.25 | ||
quality: 29 | ||
- range: {ref: chr21, beg: 26193741, end: 26193746} | ||
in_target: {ref: chr21, beg: 1, end: 1000000000} | ||
alleles: | ||
- dna: TTTTTT | ||
- dna: T | ||
quality: 29 | ||
frequency: 0.25 | ||
quality: 29 | ||
- range: {ref: chr21, beg: 29848778, end: 29848779} | ||
in_target: {ref: chr21, beg: 1, end: 1000000000} | ||
alleles: | ||
- dna: TA | ||
- dna: T | ||
quality: 19 | ||
frequency: 0.5 | ||
- dna: TATATATT | ||
normalized: | ||
range: {beg: 29848779, end: 29848779} | ||
dna: ATATATT | ||
quality: 19 | ||
frequency: 0.5 | ||
quality: 19 | ||
unification: | ||
- range: {beg: 29848779, end: 29848780} | ||
dna: T | ||
to: 1 | ||
- range: {beg: 29848779, end: 29848780} | ||
dna: ATATATTT | ||
to: 2 | ||
|
||
truth_output_vcf: | ||
- truth.vcf: | | ||
##fileformat=VCFv4.2 | ||
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency estimate for each alternate allele"> | ||
##INFO=<ID=AQ,Number=A,Type=Integer,Description="Allele Quality score reflecting evidence from all samples (Phred scale)"> | ||
##FILTER=<ID=PASS,Description="All filters passed"> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> | ||
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Genotype likelihoods, Phred encoded"> | ||
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)"> | ||
##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> | ||
##FORMAT=<ID=RNC,Number=G,Type=Character,Description="Reason for No Call in GT: . = n/a, M = Missing data, P = Partial data, D = insufficient Depth of coverage, - = unrepresentable overlapping deletion, L = Lost/unrepresentable allele (other than deletion), U = multiple Unphased variants present, O = multiple Overlapping variants present"> | ||
##contig=<ID=21,length=48129895> | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 NA12878_norm | ||
chr21 26193734 . GTTTTT TTTTTT,G 29 . AF=0.5,0.25;AQ=29,29 GT:DP:AD:GQ:PL:RNC 0/1:33:21,12,0:29:29,0,48,990,990,990:.. 1/2:32:.,12,8:29:0,0,0,0,0,0:.. | ||
chr21 26193741 . TTTTTT T 29 . AF=0.25;AQ=29 GT:DP:AD:GQ:PL:RNC 0/1:32:22,8:29:29,0,55:.. ./.:.:.:.:0,0,0:MM | ||
chr21 29848778 . TA T,TATATATT 19 . AF=0.5,0.5;AQ=19,19 GT:DP:AD:GQ:PL:RNC 1/2:32:1,12,11:16:40,19,57,19,0,47:.. 1/2:32:1,12,11:16:40,19,57,19,0,47:.. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters