From 68e25e5dd0366b58131c122671362eb662bd6f15 Mon Sep 17 00:00:00 2001 From: Mike Lin Date: Wed, 11 Aug 2021 00:02:35 -1000 Subject: [PATCH] test gvcf_norm examples --- .../data/gvcf_test_cases/deepvariant_norm.yml | 154 ++++++++++++++++++ test/gvcf_test_cases.cc | 6 + 2 files changed, 160 insertions(+) create mode 100644 test/data/gvcf_test_cases/deepvariant_norm.yml diff --git a/test/data/gvcf_test_cases/deepvariant_norm.yml b/test/data/gvcf_test_cases/deepvariant_norm.yml new file mode 100644 index 00000000..cf5194d6 --- /dev/null +++ b/test/data/gvcf_test_cases/deepvariant_norm.yml @@ -0,0 +1,154 @@ +readme: | + DeepVariant+gvcf_norm + +input: + header : |- + ##fileformat=VCFv4.2 + ##FILTER= + ##FILTER= + ##FILTER= + ##FORMAT= + ##FORMAT= + ##FORMAT= + ##FORMAT= + ##FORMAT= + ##FORMAT= + ##INFO= + ##contig= + ##contig= + ##INFO= + #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT + body: + - NA12878.gvcf: | + NA12878 + chr21 26193733 . T <*> 0 . END=26193733 GT:GQ:MIN_DP:PL 0/0:39:33:0,39,869 + chr21 26193734 . G T,<*> 29 PASS . GT:GQ:DP:AD:VAF:PL 0/1:29:33:21,12,0:0.363636,0:29,0,48,990,990,990 + chr21 26193735 . T <*> 0 . END=26193740 GT:GQ:MIN_DP:PL 0/0:50:32:0,69,929 + chr21 26193741 . TTTTTT T,<*> 29.1 PASS . GT:GQ:DP:AD:VAF:PL 0/1:29:32:22,8,0:0.25,0:29,0,55,990,990,990 + chr21 26193747 . T <*> 0 . END=26193751 GT:GQ:MIN_DP:PL 0/0:50:22:0,66,659 + chr21 29848774 . T <*> 0 . END=29848778 GT:GQ:MIN_DP:PL 0/0:50:30:0,99,989 + chr21 29848779 . AT ATATATTT,T,<*> 40.3 PASS . GT:GQ:DP:AD:VAF:PL 1/2:16:32:1,11,12,0:0.34375,0.375,0:40,19,47,19,0,57,990,990,990,990 + chr21 29848781 . T <*> 0 . END=29848791 GT:GQ:MIN_DP:PL 0/0:50:19:0,57,569 + - NA12878_norm.gvcf: | + NA12878_norm + chr21 26193733 . T <*> 0 . END=26193733 GT:GQ:MIN_DP:PL 0/0:39:33:0,39,869 + chr21 26193734 . G T,<*> 29 PASS . GT:GQ:DP:AD:VAF:PL 0/1:29:33:21,12,0:0.363636,0:29,0,48,990,990,990 + chr21 26193734 . GTTTTT G,<*> 29.1 PASS gvcf_norm_originalPOS=26193741 GT:GQ:DP:AD:VAF:PL 0/1:29:32:22,8,0:0.25,0:29,0,55,990,990,990 + chr21 26193735 . T <*> 0 . END=26193740 GT:GQ:MIN_DP:PL 0/0:50:32:0,69,929 + chr21 26193747 . T <*> 0 . END=26193751 GT:GQ:MIN_DP:PL 0/0:50:22:0,66,659 + chr21 29848774 . T <*> 0 . END=29848778 GT:GQ:MIN_DP:PL 0/0:50:30:0,99,989 + chr21 29848778 . TA TATATATT,T,<*> 40.3 PASS gvcf_norm_originalPOS=29848779 GT:GQ:DP:AD:VAF:PL 1/2:16:32:1,11,12,0:0.34375,0.375,0:40,19,47,19,0,57,990,990,990,990 + chr21 29848781 . T <*> 0 . END=29848791 GT:GQ:MIN_DP:PL 0/0:50:19:0,57,569 + +unifier_config: + min_AQ1: 0 + min_AQ2: 0 + min_GQ: 0 + monoallelic_sites_for_lost_alleles: true + +genotyper_config: + required_dp: 0 + revise_genotypes: true + snv_prior_calibration: 0.375 + indel_prior_calibration: 0.375 + allow_partial_data: true + more_PL: true + trim_uncalled_alleles: true + liftover_fields: + - orig_names: [MIN_DP, DP] + name: DP + description: '##FORMAT=' + type: int + combi_method: min + number: basic + count: 1 + ignore_non_variants: true + - orig_names: [AD] + name: AD + description: '##FORMAT=' + type: int + number: alleles + combi_method: min + default_type: zero + count: 0 + - orig_names: [GQ] + name: GQ + description: '##FORMAT=' + type: int + number: basic + combi_method: min + count: 1 + ignore_non_variants: true + - orig_names: [PL] + name: PL + description: '##FORMAT=' + type: int + number: genotype + combi_method: missing + count: 0 + ignore_non_variants: true + +truth_unified_sites: +- range: {ref: chr21, beg: 26193734, end: 26193739} + in_target: {ref: chr21, beg: 1, end: 1000000000} + alleles: + - dna: GTTTTT + - dna: TTTTTT + normalized: + range: {beg: 26193734, end: 26193734} + dna: T + quality: 29 + frequency: 0.5 + - dna: G + quality: 29 + frequency: 0.25 + quality: 29 +- range: {ref: chr21, beg: 26193741, end: 26193746} + in_target: {ref: chr21, beg: 1, end: 1000000000} + alleles: + - dna: TTTTTT + - dna: T + quality: 29 + frequency: 0.25 + quality: 29 +- range: {ref: chr21, beg: 29848778, end: 29848779} + in_target: {ref: chr21, beg: 1, end: 1000000000} + alleles: + - dna: TA + - dna: T + quality: 19 + frequency: 0.5 + - dna: TATATATT + normalized: + range: {beg: 29848779, end: 29848779} + dna: ATATATT + quality: 19 + frequency: 0.5 + quality: 19 + unification: + - range: {beg: 29848779, end: 29848780} + dna: T + to: 1 + - range: {beg: 29848779, end: 29848780} + dna: ATATATTT + to: 2 + +truth_output_vcf: + - truth.vcf: | + ##fileformat=VCFv4.2 + ##INFO= + ##INFO= + ##FILTER= + ##FORMAT= + ##FORMAT= + ##FORMAT= + ##FORMAT= + ##FORMAT= + ##FORMAT= + ##contig= + #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 NA12878_norm + chr21 26193734 . GTTTTT TTTTTT,G 29 . AF=0.5,0.25;AQ=29,29 GT:DP:AD:GQ:PL:RNC 0/1:33:21,12,0:29:29,0,48,990,990,990:.. 1/2:32:.,12,8:29:0,0,0,0,0,0:.. + chr21 26193741 . TTTTTT T 29 . AF=0.25;AQ=29 GT:DP:AD:GQ:PL:RNC 0/1:32:22,8:29:29,0,55:.. ./.:.:.:.:0,0,0:MM + chr21 29848778 . TA T,TATATATT 19 . AF=0.5,0.5;AQ=19,19 GT:DP:AD:GQ:PL:RNC 1/2:32:1,12,11:16:40,19,57,19,0,47:.. 1/2:32:1,12,11:16:40,19,57,19,0,47:.. diff --git a/test/gvcf_test_cases.cc b/test/gvcf_test_cases.cc index 5dc86997..3e5e3ce6 100644 --- a/test/gvcf_test_cases.cc +++ b/test/gvcf_test_cases.cc @@ -852,6 +852,12 @@ TEST_CASE("DeepVariant2") { GVCFTestCase("deepvariant2", v_formats, v_infos, false).perform_gvcf_test(); } +TEST_CASE("DeepVariant_norm") { + vector v_formats = {"DP", "GT", "GQ", "PL", "AD", "RNC"}; + vector v_infos = {"ANR","AF","AQ"}; + GVCFTestCase("deepvariant_norm", v_formats, v_infos, false).perform_gvcf_test(); +} + TEST_CASE("dv_1000G_chr21_5583275") { vector v_formats = {"DP", "GT", "GQ", "PL", "AD", "RNC"}; vector v_infos = {"ANR","AF","AQ"};