pv added

RIVM-IIV-Microbiome · Nov 28, 2021 · 8519b39 · 8519b39
1 parent b500880
commit 8519b39
Show file tree

Hide file tree

Showing 7 changed files with 230 additions and 13 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: biomeStats
 Type: Package
 Title: Methods for carrying out association studies of the microbiome
-Version: 00.00.04
+Version: 00.00.05
 Authors@R: 
     c(person(given = "Sudarshan", family = "Shetty", role = c("aut", "cre"),
              email = "[email protected]",

diff --git a/NAMESPACE b/NAMESPACE
@@ -11,6 +11,7 @@ export(illustrate.ONE.association.BY.STRATUM)
 export(prepSampleData)
 export(prepTaxaAbundanceData)
 export(show.missing.by.variable)
+export(taxaPV)
 export(test.ONE.association)
 export(test.ONE.association.0)
 import(graphics)
@@ -21,6 +22,7 @@ importFrom(coin,kruskal_test)
 importFrom(coin,pvalue)
 importFrom(coin,spearman_test)
 importFrom(coin,statistic)
+importFrom(dplyr,"%>%")
 importFrom(dplyr,as_tibble)
 importFrom(dplyr,distinct)
 importFrom(dplyr,group_by)
@@ -35,7 +37,10 @@ importFrom(magrittr,"%>%")
 importFrom(microbiome,abundances)
 importFrom(microbiome,core_members)
 importFrom(microbiome,meta)
+importFrom(phyloseq,nsamples)
+importFrom(phyloseq,prune_samples)
 importFrom(phyloseq,sample_data)
+importFrom(phyloseq,sample_names)
 importFrom(readr,write_tsv)
 importFrom(reshape2,melt)
 importFrom(rstatix,add_significance)
@@ -48,7 +53,9 @@ importFrom(stats,as.formula)
 importFrom(stats,cor)
 importFrom(stats,na.omit)
 importFrom(stats,qnorm)
+importFrom(stats,quantile)
 importFrom(stats,sd)
 importFrom(tibble,column_to_rownames)
 importFrom(tibble,rownames_to_column)
 importFrom(tidyr,pivot_wider)
+importFrom(utils,combn)
diff --git a/R/doAssociationAnalysis.R b/R/doAssociationAnalysis.R
@@ -77,12 +77,7 @@ doAssociationAnalysis <- function(data_for_testing,
 
   for(f in (1:no_of_factors)){
 
-    #if(verbose==TRUE){
-    # message(paste0("Processing.. ", test.i[1], " vs ", test.i[2]))
-    #}
-
     start.time <- Sys.time()
-
     columns.f <- cbind(f,aux_columns)
     #head(columns.f)
     tests <- NULL
@@ -95,13 +90,11 @@ doAssociationAnalysis <- function(data_for_testing,
                                        data_for_testing,
                                        aux_TYPES,variables.of.interest)
 
-      #if(verbose==TRUE){
-      # message(paste0("Processing.. ", test.i[1], " vs ", test.i[2]))
-      #}
-
       p.value.i <- as.numeric(test.i[3])
       #p.value.i
-      if(is.na(p.value.i)){p.value.i <- 2; B.i <- 2*B}
+      if(is.na(p.value.i)){
+        p.value.i <- 2; B.i <- 2*B
+        }
       if(p.value.i<1 & p.value.i>0){
         B.i <- min(max(B,ceiling(400/((1-p.value.i)*p.value.i))),1000000*10*10*10)
       }else{
@@ -119,6 +112,7 @@ doAssociationAnalysis <- function(data_for_testing,
         CI.p.value.i <- paste("[",CI.p.value.i[1],",",CI.p.value.i[2],"]",collapse="")
       }else{
         #print(c(i,B.i))
+
         tests.i <- test.ONE.association(as.vector(columns.f[i,]),
                                         data_for_testing,aux_TYPES,
                                         variables.of.interest,

diff --git a/R/taxaPV.R b/R/taxaPV.R
@@ -0,0 +1,144 @@
+#' Calculate Proportional Variability For Taxa
+#'
+#' @name taxaPV
+#'
+#' @details Proportional Variability of taxa using relative abundance.
+#'          The Proportional Variability is a non-parametric measure of variability.
+#'          \url{https://doi.org/10.1371/journal.pone.0084074}. Here, I have
+#'          incorporated subsampling approach to quantify Proportional Variability
+#'          in microbiome studies.
+#'
+#' @param x A phyloseq object
+#'
+#' @param subsample Logical. Default=TRUE.
+#'
+#' @param lower_conf Lower confidence. Default=0.025
+#' @param upper_conf Lower confidence. Default=0.975
+#' @param sampling_n Number of subsampling. Default=9
+#' @param round_pv_vals Return PV values by rounding to certain value. Default=3
+#' @examples
+#' library(biomeUtils)
+#' library(dplyr)
+#' library(microbiome)
+#' data("FuentesIliGutData")
+#' # Check for control samples how variable are core taxa
+#' ps1 <- filterSampleData(FuentesIliGutData, ILI == "C") %>%
+#'      microbiome::transform("compositional") %>%
+#'      core(0.01, 75/100)
+#' taxa_fd <- taxaPV(ps1)
+#' taxa_fd
+#'
+#' @return A tibble with taxa ids, taxonomic information,
+#'         two-group prevalence and fold change values.
+#'
+#' @author Original Author: Leo Lahti. Adapted by Sudarshan A. Shetty
+#'
+#' @references
+#' Heath JP, Borowski P. (2013). Quantifying Proportional Variability.
+#' \emph{PLoS ONE} 8(12): e84074.
+#' \url{https://doi.org/10.1371/journal.pone.0084074}
+#'
+#' Heath J. (2006) Quantifying temporal variability in population abundances.
+#' \emph{Oikos} 115, no. 3 (2006): 573-581.
+#' \url{https://doi.org/10.1111/j.2006.0030-1299.15067.x}
+#'
+#' Shetty SA (2021). Utilities for microbiome analytics.
+#' \url{https://github.com/RIVM-IIV-Microbiome/biomeUtils}
+#'
+#' @importFrom tibble rownames_to_column
+#' @importFrom dplyr %>%
+#'
+#' @export
+
+taxaPV <- function(x,
+                   subsample = TRUE,
+                   lower_conf=0.025,
+                   upper_conf=0.975,
+                   sampling_n =9,
+                   round_pv_vals = 3){
+
+  if (class(x) != "phyloseq") {
+    stop("Input is not an object of phyloseq class")
+  }
+
+  if(subsample){
+    all.pv <- .taxa_pv(x)
+    sampled.pv <- .taxa_pv_subsampled(x,
+                                      lower_conf = lower_conf,
+                                      upper_conf = upper_conf,
+                                      sampling_n = sampling_n)
+    comb.pv <- cbind(all.pv, sampled.pv)
+    comb.pv <- round(comb.pv, digits = round_pv_vals)
+    comb.pv <- comb.pv %>%
+      tibble::rownames_to_column("FeatureID")
+    return(comb.pv)
+  }
+
+  all.pv <- .taxa_pv(x)
+  all.pv <- round(all.pv, digits = round_pv_vals)
+  all.pv <- all.pv %>%
+    tibble::rownames_to_column("FeatureID")
+  return(all.pv)
+}
+
+
+#' @importFrom phyloseq sample_names prune_samples nsamples
+#' @importFrom stats quantile
+.taxa_pv_subsampled <- function(x,
+                                lower_conf = lower_conf,
+                                upper_conf = upper_conf,
+                                sampling_n = sampling_n){
+  rand_sams <- ps.sub <- txvp <- sx <- cis_df <- sxi <- NULL
+  s <- NULL
+  for (i in seq_len(sampling_n)) {
+    size_80 <- round(0.8*nsamples(x))
+    rand_sams <- sample(sample_names(x), size= size_80, replace = TRUE)
+    ps.sub <- prune_samples(sample_names(x) %in% rand_sams, x)
+    #ps.sub <- prune_taxa(taxa_sums(ps.sub) > 0, ps.sub)
+    txvp <- .taxa_pv(ps.sub)
+    #rownames(txvp) <- txvp$Taxa
+    s[[i]] <- txvp
+  }
+  sx <- suppressMessages(dplyr::bind_cols(s))
+  tx.pv.lci <- apply(sx, 1, function(x){
+    quantile(x,lower_conf,na.rm=TRUE)
+  })
+  tx.pv.uci <- apply(sx, 1, function(x){
+    quantile(x,upper_conf,na.rm=TRUE)
+  })
+
+  cis <- data.frame(LowerCI = tx.pv.lci,
+                    UpperCI = tx.pv.uci)
+  return(cis)
+}
+
+#' @importFrom microbiome abundances
+## Overall PV calculations
+.taxa_pv <- function(x) {
+
+  sp_tab_tax <- NULL
+  # proportional variability index (PV)
+  sp_tab_tax <- t(abundances(x))
+  pv.d <- apply(sp_tab_tax, 2, .cal.pv)
+  pv.d <- data.frame(pv = pv.d)
+  return(pv.d)
+}
+
+# Modified PV Formula to account for sparsity
+.cal.pv <- function(x){
+  k = (1/100)*mean(x) # get low pseudocount
+  pv_val_tax <- .pv.index(x + k)
+  return(pv_val_tax)
+}
+
+# Main PV Formula
+#' @importFrom utils combn
+.pv.index <- function (Z){
+  n = length(Z)
+  pairs = combn(Z,2)
+  min_z = apply(pairs,2, min)
+  max_z = apply(pairs,2, max)
+  z = 1- (min_z/max_z)
+  PV=2*sum(z)/(n*(n-1))
+  return(PV)
+}
diff --git a/R/test.ONE.association.R b/R/test.ONE.association.R
@@ -119,7 +119,8 @@ test.ONE.association <- function(columns, data_for_testing, TYPES, variables.of.
     if (is.element(type.1, c("binary", "categorical", "ordinal")) &
       is.element(type.2, c("binary", "categorical", "ordinal")) & (tested == FALSE)) {
       CHM.test <- coin::cmh_test(as.factor(response) ~ as.factor(treatment) | stratum,
-        data = response.by.treatment, distribution = approximate(nresample = B.i)
+                                 data = response.by.treatment,
+                                 distribution = approximate(nresample = B.i)
       )
       p.value <- as.numeric(pvalue(CHM.test))
       if ((type.1 == "binary" & type.2 != "categorical") | (type.2 == "binary" & type.1 != "categorical")) {

diff --git a/man/taxaPV.Rd b/man/taxaPV.Rd
diff --git a/vignettes/biomestats.Rmd b/vignettes/biomestats.Rmd
@@ -34,7 +34,7 @@ data("FuentesIliGutData")
 ps <- subset_samples(FuentesIliGutData, ILI %in% c("C", "L1"))
 ps.gen <- tax_glom(ps, "Genus")
 #saveRDS(ps.gen, "data-raw/ps.gen.rds")
-#ps.gen <- readRDS("../data-raw/ps.gen.rds")
+ps.gen <- readRDS("../data-raw/ps.gen.rds")
 ps.rel <- microbiome::transform(ps.gen, "compositional")
 taxa_names(ps.rel) <- make.unique(tax_table(ps.rel)[,"Genus"])
 sample_data(ps.rel)$sample_ids <- sample_names(ps.rel)
@@ -87,6 +87,7 @@ gen_tab <- prepTaxaAbundanceData(ps.rel,
                                  prevalence_threshold=50/100)
 
 dim(gen_tab)
+
 ```
 
 ## Variables to test