From 80b22852f15a59c20b9fa417a2c7fb11ed6245bf Mon Sep 17 00:00:00 2001 From: Zilong-Li Date: Tue, 26 Mar 2024 21:15:42 +0100 Subject: [PATCH] v0.4.3 --- DESCRIPTION | 2 +- README.Rmd | 35 ++++++++++++++++++++++++++--------- README.md | 42 ++++++++++++++++++++++++++++++------------ cran-comments.md | 5 ++++- 4 files changed, 61 insertions(+), 23 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 53e0e61..6e7b097 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: vcfppR Title: Rapid Manipulation of the Variant Call Format (VCF) -Version: 0.4.2 +Version: 0.4.3 Authors@R: c( person("Zilong", "Li", , "zilong.dk@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-5859-2078")), diff --git a/README.Rmd b/README.Rmd index c1b2443..222fec2 100644 --- a/README.Rmd +++ b/README.Rmd @@ -32,11 +32,20 @@ The vcfppR package implements various useful functions for rapidly manipulating remotes::install_github("Zilong-Li/vcfppR") ## from latest github ``` -## vcftable: read VCF as tabular data +## Cite the work + +If you find it useful, please cite the [paper](https://doi.org/10.1093/bioinformatics/btae049) + +``` r +library(vcfppR) +citation("vcfppR") +``` + +## `vcftable`: read VCF as tabular data `vcftable` gives you fine control over what you want to extract from VCF/BCF files. -Read only SNP variants: +**Read only SNP variants** ```r library(vcfppR) @@ -45,7 +54,7 @@ res <- vcftable(vcffile, "chr21:1-5100000", vartype = "snps") str(res) ``` -Read only SNP variants with PL format and drop the INFO column in the VCF/BCF: +**Read only SNP variants with PL format and drop the INFO column in the VCF/BCF** ```r vcffile <- "https://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_raw_GT_with_annot/20201028_CCDG_14151_B01_GRM_WGS_2020-08-05_chr21.recalibrated_variants.vcf.gz" @@ -53,14 +62,14 @@ res <- vcftable(vcffile, "chr21:1-5100000", vartype = "snps", format = "PL", inf str(res) ``` -Read only indels variants with DP format in the VCF/BCF: +**Read only INDEL variants with DP format in the VCF/BCF** ```r vcffile <- "https://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_raw_GT_with_annot/20201028_CCDG_14151_B01_GRM_WGS_2020-08-05_chr21.recalibrated_variants.vcf.gz" res <- vcftable(vcffile, "chr21:1-5100000", vartype = "indels", format = "DP") str(res) ``` -## vcfcomp: compare two VCF files and report concordance statistics +## `vcfcomp`: compare two VCF files and report concordance Want to investigate the concordance between two VCF files? `vcfcomp` is the utility function you need! @@ -68,19 +77,27 @@ Want to investigate the concordance between two VCF files? `vcfcomp` is the util ```r vcffile <- "https://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20220422_3202_phased_SNV_INDEL_SV/1kGP_high_coverage_Illumina.chr21.filtered.SNV_INDEL_SV_phased_panel.vcf.gz" -res <- vcfcomp(test = vcffile, truth = vcffile, region = "chr21:1-5100000", stats = "r2", format = c('GT','GT')) -as.data.frame(res) +res <- vcfcomp(test = vcffile, truth = vcffile, region = "chr21:1-5100000", stats = "r2", formats = c('GT','GT')) +str(res) ``` **Genotype F1 score** ```r vcffile <- "https://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20220422_3202_phased_SNV_INDEL_SV/1kGP_high_coverage_Illumina.chr21.filtered.SNV_INDEL_SV_phased_panel.vcf.gz" -res <- vcfcomp(test = vcffile, truth = vcffile, region = "chr21:1-5100000", stats = "f1", format = c('GT','GT')) +res <- vcfcomp(test = vcffile, truth = vcffile, region = "chr21:1-5100000", stats = "f1") +str(res) +``` + +**Genotype Non-Reference Concordance** + +```r +vcffile <- "https://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20220422_3202_phased_SNV_INDEL_SV/1kGP_high_coverage_Illumina.chr21.filtered.SNV_INDEL_SV_phased_panel.vcf.gz" +res <- vcfcomp(test = vcffile, truth = vcffile, region = "chr21:1-5100000", stats = "nrc") str(res) ``` -## vcfsummary: variants characterization +## `vcfsummary`: variants characterization Want to summarize variants discovered by genotype caller e.g. GATK? `vcfsummary` is the utility function you need! diff --git a/README.md b/README.md index cd58fd3..1370b2f 100644 --- a/README.md +++ b/README.md @@ -24,12 +24,22 @@ manipulating VCF/BCF files in R using the C++ API of remotes::install_github("Zilong-Li/vcfppR") ## from latest github ``` -## vcftable: read VCF as tabular data +## Cite the work + +If you find it useful, please cite the +[paper](https://doi.org/10.1093/bioinformatics/btae049) + +``` r +library(vcfppR) +citation("vcfppR") +``` + +## `vcftable`: read VCF as tabular data `vcftable` gives you fine control over what you want to extract from VCF/BCF files. -Read only SNP variants: +**Read only SNP variants** ``` r library(vcfppR) @@ -38,8 +48,8 @@ res <- vcftable(vcffile, "chr21:1-5100000", vartype = "snps") str(res) ``` -Read only SNP variants with PL format and drop the INFO column in the -VCF/BCF: +**Read only SNP variants with PL format and drop the INFO column in the +VCF/BCF** ``` r vcffile <- "https://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_raw_GT_with_annot/20201028_CCDG_14151_B01_GRM_WGS_2020-08-05_chr21.recalibrated_variants.vcf.gz" @@ -47,7 +57,7 @@ res <- vcftable(vcffile, "chr21:1-5100000", vartype = "snps", format = "PL", inf str(res) ``` -Read only indels variants with DP format in the VCF/BCF: +**Read only INDEL variants with DP format in the VCF/BCF** ``` r vcffile <- "https://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_raw_GT_with_annot/20201028_CCDG_14151_B01_GRM_WGS_2020-08-05_chr21.recalibrated_variants.vcf.gz" @@ -55,31 +65,39 @@ res <- vcftable(vcffile, "chr21:1-5100000", vartype = "indels", format = "DP") str(res) ``` -## vcfcomp: compare two VCF files and report concordance statistics +## `vcfcomp`: compare two VCF files and report concordance Want to investigate the concordance between two VCF files? `vcfcomp` is -the utility function you need\! +the utility function you need! **Genotype correlation** ``` r vcffile <- "https://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20220422_3202_phased_SNV_INDEL_SV/1kGP_high_coverage_Illumina.chr21.filtered.SNV_INDEL_SV_phased_panel.vcf.gz" -res <- vcfcomp(test = vcffile, truth = vcffile, region = "chr21:1-5100000", stats = "r2", format = c('GT','GT')) -as.data.frame(res) +res <- vcfcomp(test = vcffile, truth = vcffile, region = "chr21:1-5100000", stats = "r2", formats = c('GT','GT')) +str(res) ``` **Genotype F1 score** ``` r vcffile <- "https://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20220422_3202_phased_SNV_INDEL_SV/1kGP_high_coverage_Illumina.chr21.filtered.SNV_INDEL_SV_phased_panel.vcf.gz" -res <- vcfcomp(test = vcffile, truth = vcffile, region = "chr21:1-5100000", stats = "f1", format = c('GT','GT')) +res <- vcfcomp(test = vcffile, truth = vcffile, region = "chr21:1-5100000", stats = "f1") +str(res) +``` + +**Genotype Non-Reference Concordance** + +``` r +vcffile <- "https://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20220422_3202_phased_SNV_INDEL_SV/1kGP_high_coverage_Illumina.chr21.filtered.SNV_INDEL_SV_phased_panel.vcf.gz" +res <- vcfcomp(test = vcffile, truth = vcffile, region = "chr21:1-5100000", stats = "nrc") str(res) ``` -## vcfsummary: variants characterization +## `vcfsummary`: variants characterization Want to summarize variants discovered by genotype caller e.g. GATK? -`vcfsummary` is the utility function you need\! +`vcfsummary` is the utility function you need! **Small variants** diff --git a/cran-comments.md b/cran-comments.md index 5896048..11caf30 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1 +1,4 @@ -address clang-UBSAN issue by including the latest htslib-1.19.1 (https://github.com/samtools/htslib/releases/tag/1.19) +1. address clang-UBSAN issue by including the latest htslib-1.19.1 (https://github.com/samtools/htslib/releases/tag/1.19) +2. reduce size of package +3. add copyrights and authors of htslib +4. new function `vcfcomp`