Skip to content

Commit

Permalink
copy over some additional test data files
Browse files Browse the repository at this point in the history
  • Loading branch information
aryarm committed Aug 27, 2024
1 parent d57852b commit ca2383b
Show file tree
Hide file tree
Showing 5 changed files with 234 additions and 0 deletions.
30 changes: 30 additions & 0 deletions tests/data/snps.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
##fileformat=VCFv4.1
##FILTER=<ID=PASS,Description="All filters passed">
##fileDate=15082017_02h10m46s
##source=SHAPEIT2.v837
##log_file=shapeit_15082017_02h10m46s_d9e4b88d-2500-460c-9753-e6c2ac3bcb15.log
##FORMAT=<ID=GT,Number=1,Type=String,Description="Phased Genotype">
##contig=<ID=22>
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##bcftools_viewVersion=1.6-49-ga2ffda9-dirty+htslib-1.7-1-gaf40c73
##bcftools_viewCommand=view --samples SS0012978,SS0012979,SS0013012 -r 22:16050115-16052271 -Oz -o snps.vcf.gz /storage/resources/datasets/SSC_SNP_v3/shapeit.chr22.with.ref.v3.vcf.gz; Date=Wed Jan 29 11:43:55 2020
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SS0012978 SS0012979 SS0013012
22 16050115 22:16050115:G:A G A . PASS AC=0;AN=6 GT 0|0 0|0 0|0
22 16050213 22:16050213:C:T C T . PASS AC=0;AN=6 GT 0|0 0|0 0|0
22 16050527 22:16050527:C:A C A . PASS AC=0;AN=6 GT 0|0 0|0 0|0
22 16050607 22:16050607:G:A G A . PASS AC=0;AN=6 GT 0|0 0|0 0|0
22 16050783 22:16050783:A:G A G . PASS AC=0;AN=6 GT 0|0 0|0 0|0
22 16050840 22:16050840:C:G C G . PASS AC=0;AN=6 GT 0|0 0|0 0|0
22 16050847 22:16050847:T:C T C . PASS AC=0;AN=6 GT 0|0 0|0 0|0
22 16050984 rs188945759 C G . PASS AC=0;AN=6 GT 0|0 0|0 0|0
22 16051075 22:16051075:G:A G A . PASS AC=0;AN=6 GT 0|0 0|0 0|0
22 16051246 22:16051246:G:A G A . PASS AC=0;AN=6 GT 0|0 0|0 0|0
22 16051249 rs62224609 T C . PASS AC=0;AN=6 GT 0|0 0|0 0|0
22 16051477 rs192339082 C A . PASS AC=0;AN=6 GT 0|0 0|0 0|0
22 16051493 22:16051493:G:A G A . PASS AC=0;AN=6 GT 0|0 0|0 0|0
22 16051564 22:16051564:T:C T C . PASS AC=0;AN=6 GT 0|0 0|0 0|0
22 16051816 22:16051816:T:G T G . PASS AC=0;AN=6 GT 0|0 0|0 0|0
22 16052032 22:16052032:G:A G A . PASS AC=0;AN=6 GT 0|0 0|0 0|0
22 16052080 rs4965031 G A . PASS AC=1;AN=6 GT 0|0 0|0 1|0
22 16052271 rs188996808 G A . PASS AC=0;AN=6 GT 0|0 0|0 0|0
21 changes: 21 additions & 0 deletions tests/data/test_ExpansionHunter.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
##fileformat=VCFv4.1
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant">
##INFO=<ID=REF,Number=1,Type=Integer,Description="Reference copy number">
##INFO=<ID=REPID,Number=1,Type=String,Description="Repeat identifier as specified in the variant catalog">
##INFO=<ID=RL,Number=1,Type=Integer,Description="Reference length in bp">
##INFO=<ID=RU,Number=1,Type=String,Description="Repeat unit in the reference orientation">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##INFO=<ID=VARID,Number=1,Type=String,Description="Variant identifier as specified in the variant catalog">
##FILTER=<ID=PASS,Description="All filters passed">
##FORMAT=<ID=ADFL,Number=1,Type=String,Description="Number of flanking reads consistent with the allele">
##FORMAT=<ID=ADIR,Number=1,Type=String,Description="Number of in-repeat reads consistent with the allele">
##FORMAT=<ID=ADSP,Number=1,Type=String,Description="Number of spanning reads consistent with the allele">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=LC,Number=1,Type=Float,Description="Locus coverage">
##FORMAT=<ID=REPCI,Number=1,Type=String,Description="Confidence interval for REPCN">
##FORMAT=<ID=REPCN,Number=1,Type=String,Description="Number of repeat units spanned by the allele">
##FORMAT=<ID=SO,Number=1,Type=String,Description="Type of reads that support the allele; can be SPANNING, FLANKING, or INREPEAT meaning that the reads span, flank, or are fully contained in the repeat">
##ALT=<ID=STR16,Description="Allele comprised of 16 repeat units">
##ALT=<ID=STR18,Description="Allele comprised of 18 repeat units">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878_S1_HTT
chr4 3076603 . C <STR16>,<STR18> . PASS END=3076660;REF=19;RL=57;RU=CAG;VARID=HTT;REPID=HTT GT:SO:REPCN:REPCI:ADSP:ADFL:ADIR:LC 1/2:SPANNING/SPANNING:16/18:16-16/18-18:4/7:319/320:0/0:34.624013
13 changes: 13 additions & 0 deletions tests/data/test_advntr.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
##fileformat=VCFv4.3
##source=adVNTR ver. 1.3.3
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of variant">
##INFO=<ID=VID,Number=1,Type=Integer,Description="VNTR ID">
##INFO=<ID=RU,Number=1,Type=String,Description="Repeat motif">
##INFO=<ID=RC,Number=1,Type=Integer,Description="Reference repeat unit count">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Spanning read count">
##FORMAT=<ID=FR,Number=1,Type=Integer,Description="Flanking read count">
##FORMAT=<ID=ML,Number=1,Type=Float,Description="Maximum likelihood">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CSTB_2_5_testdata.bam
chr21 45196323 . GCGCGGGGCGGGGCGCGGGGCGGGGCGCGGGGCGGG GCGCGGGGCGGGGCGCGGGGCGGG,GCGCGGGGCGGGGCGCGGGGCGGGGCGCGGGGCGGGGCGCGGGGCGGGGCGCGGGGCGGG . . END=45196359;VID=301645;RU=GCGCGGGGCGGG;RC=3 GT:DP:SR:FR:ML 1/2:60:39:21:0.863
46 changes: 46 additions & 0 deletions tests/data/test_gangstr_noqual.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
##fileformat=VCFv4.1
##FILTER=<ID=PASS,Description="All filters passed">
##command=GangSTR-2.4.2.12-9da7 --bam /scratch/bams/drive3/4512-JFI-0366_BXD196_phased_possorted_bam.bam --regions /scratch/regions/0705_mm10.bed --ref /scratch/ref/mm10.fa --str-info /scratch/strinfo/100519_mm10_strinfo_wo_stutt_recalc_many_samples.bed --chrom chr1 --trim-to-readlength 128 --drop-dupes --out result/4512-JFI-0366_BXD196/res_chr1
##contig=<ID=chr10,length=130694993>
##contig=<ID=chr11,length=122082543>
##contig=<ID=chr12,length=120129022>
##contig=<ID=chr13,length=120421639>
##contig=<ID=chr14,length=124902244>
##contig=<ID=chr15,length=104043685>
##contig=<ID=chr16,length=98207768>
##contig=<ID=chr17,length=94987271>
##contig=<ID=chr18,length=90702639>
##contig=<ID=chr19,length=61431566>
##contig=<ID=chr1,length=195471971>
##contig=<ID=chr2,length=182113224>
##contig=<ID=chr3,length=160039680>
##contig=<ID=chr4,length=156508116>
##contig=<ID=chr5,length=151834684>
##contig=<ID=chr6,length=149736546>
##contig=<ID=chr7,length=145441459>
##contig=<ID=chr8,length=129401213>
##contig=<ID=chr9,length=124595110>
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of variant">
##INFO=<ID=RU,Number=1,Type=String,Description="Repeat motif">
##INFO=<ID=PERIOD,Number=1,Type=Integer,Description="Repeat period (length of motif)">
##INFO=<ID=REF,Number=1,Type=Float,Description="Reference copy number">
##INFO=<ID=GRID,Number=2,Type=Integer,Description="Range of optimization grid">
##INFO=<ID=EXPTHRESH,Number=1,Type=Integer,Description="Threshold for calling expansions">
##INFO=<ID=STUTTERUP,Number=1,Type=Float,Description="Stutter model - up prob">
##INFO=<ID=STUTTERDOWN,Number=1,Type=Float,Description="Stutter model - down prob">
##INFO=<ID=STUTTERP,Number=1,Type=Float,Description="Stutter model - p">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
##FORMAT=<ID=REPCN,Number=2,Type=Integer,Description="Genotype given in number of copies of the repeat motif">
##FORMAT=<ID=REPCI,Number=1,Type=String,Description="Confidence intervals">
##FORMAT=<ID=RC,Number=1,Type=String,Description="Number of reads in each class (enclosing, spanning, FRR, bounding)">
##FORMAT=<ID=ENCLREADS,Number=1,Type=String,Description="Summary of reads in enclosing class. Keys are number of copies and values show number of reads with that many copies.">
##FORMAT=<ID=FLNKREADS,Number=1,Type=String,Description="Summary of reads in flanking class. Keys are number of copies and values show number of reads with that many copies.">
##FORMAT=<ID=ML,Number=1,Type=Float,Description="Maximum likelihood">
##FORMAT=<ID=INS,Number=2,Type=Float,Description="Insert size mean and stddev">
##FORMAT=<ID=STDERR,Number=2,Type=Float,Description="Bootstrap standard error of each allele">
##FORMAT=<ID=QEXP,Number=3,Type=Float,Description="Prob. of no expansion, 1 expanded allele, both expanded alleles">
##bcftools_filterVersion=1.6-49-ga2ffda9-dirty+htslib-1.7-1-gaf40c73
##bcftools_filterCommand=filter -r chr1:3000000-10000000 /storage/mikhail/100919_BXD_all_drive1_drive3_rep_stutt_model/result/4512-JFI-0366_BXD196/res_chr1.vcf.gz; Date=Sun Nov 24 18:06:49 2019
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT BXD196_0366
chr1 3004986 . tctgtctgtctg . . PASS END=3004997;RU=tctg;PERIOD=4;REF=3;GRID=1,6;STUTTERUP=0.000632591;STUTTERDOWN=0.000632591;STUTTERP=0.953462;EXPTHRESH=3 GT:DP:REPCN:REPCI:RC:ENCLREADS:FLNKREADS:ML:INS:STDERR:QEXP 0/0:31:3,3:3-3,3-3:17,12,0,2:3,17:2,2:193.56:366.612,131.36:0,0:0,5.1188e-05,0.999949
Loading

0 comments on commit ca2383b

Please sign in to comment.