Skip to content

Commit

Permalink
pv added
Browse files Browse the repository at this point in the history
  • Loading branch information
microsud committed Nov 28, 2021
1 parent b500880 commit 8519b39
Show file tree
Hide file tree
Showing 7 changed files with 230 additions and 13 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: biomeStats
Type: Package
Title: Methods for carrying out association studies of the microbiome
Version: 00.00.04
Version: 00.00.05
Authors@R:
c(person(given = "Sudarshan", family = "Shetty", role = c("aut", "cre"),
email = "[email protected]",
Expand Down
7 changes: 7 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export(illustrate.ONE.association.BY.STRATUM)
export(prepSampleData)
export(prepTaxaAbundanceData)
export(show.missing.by.variable)
export(taxaPV)
export(test.ONE.association)
export(test.ONE.association.0)
import(graphics)
Expand All @@ -21,6 +22,7 @@ importFrom(coin,kruskal_test)
importFrom(coin,pvalue)
importFrom(coin,spearman_test)
importFrom(coin,statistic)
importFrom(dplyr,"%>%")
importFrom(dplyr,as_tibble)
importFrom(dplyr,distinct)
importFrom(dplyr,group_by)
Expand All @@ -35,7 +37,10 @@ importFrom(magrittr,"%>%")
importFrom(microbiome,abundances)
importFrom(microbiome,core_members)
importFrom(microbiome,meta)
importFrom(phyloseq,nsamples)
importFrom(phyloseq,prune_samples)
importFrom(phyloseq,sample_data)
importFrom(phyloseq,sample_names)
importFrom(readr,write_tsv)
importFrom(reshape2,melt)
importFrom(rstatix,add_significance)
Expand All @@ -48,7 +53,9 @@ importFrom(stats,as.formula)
importFrom(stats,cor)
importFrom(stats,na.omit)
importFrom(stats,qnorm)
importFrom(stats,quantile)
importFrom(stats,sd)
importFrom(tibble,column_to_rownames)
importFrom(tibble,rownames_to_column)
importFrom(tidyr,pivot_wider)
importFrom(utils,combn)
14 changes: 4 additions & 10 deletions R/doAssociationAnalysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,7 @@ doAssociationAnalysis <- function(data_for_testing,

for(f in (1:no_of_factors)){

#if(verbose==TRUE){
# message(paste0("Processing.. ", test.i[1], " vs ", test.i[2]))
#}

start.time <- Sys.time()

columns.f <- cbind(f,aux_columns)
#head(columns.f)
tests <- NULL
Expand All @@ -95,13 +90,11 @@ doAssociationAnalysis <- function(data_for_testing,
data_for_testing,
aux_TYPES,variables.of.interest)

#if(verbose==TRUE){
# message(paste0("Processing.. ", test.i[1], " vs ", test.i[2]))
#}

p.value.i <- as.numeric(test.i[3])
#p.value.i
if(is.na(p.value.i)){p.value.i <- 2; B.i <- 2*B}
if(is.na(p.value.i)){
p.value.i <- 2; B.i <- 2*B
}
if(p.value.i<1 & p.value.i>0){
B.i <- min(max(B,ceiling(400/((1-p.value.i)*p.value.i))),1000000*10*10*10)
}else{
Expand All @@ -119,6 +112,7 @@ doAssociationAnalysis <- function(data_for_testing,
CI.p.value.i <- paste("[",CI.p.value.i[1],",",CI.p.value.i[2],"]",collapse="")
}else{
#print(c(i,B.i))

tests.i <- test.ONE.association(as.vector(columns.f[i,]),
data_for_testing,aux_TYPES,
variables.of.interest,
Expand Down
144 changes: 144 additions & 0 deletions R/taxaPV.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#' Calculate Proportional Variability For Taxa
#'
#' @name taxaPV
#'
#' @details Proportional Variability of taxa using relative abundance.
#' The Proportional Variability is a non-parametric measure of variability.
#' \url{https://doi.org/10.1371/journal.pone.0084074}. Here, I have
#' incorporated subsampling approach to quantify Proportional Variability
#' in microbiome studies.
#'
#' @param x A phyloseq object
#'
#' @param subsample Logical. Default=TRUE.
#'
#' @param lower_conf Lower confidence. Default=0.025
#' @param upper_conf Lower confidence. Default=0.975
#' @param sampling_n Number of subsampling. Default=9
#' @param round_pv_vals Return PV values by rounding to certain value. Default=3
#' @examples
#' library(biomeUtils)
#' library(dplyr)
#' library(microbiome)
#' data("FuentesIliGutData")
#' # Check for control samples how variable are core taxa
#' ps1 <- filterSampleData(FuentesIliGutData, ILI == "C") %>%
#' microbiome::transform("compositional") %>%
#' core(0.01, 75/100)
#' taxa_fd <- taxaPV(ps1)
#' taxa_fd
#'
#' @return A tibble with taxa ids, taxonomic information,
#' two-group prevalence and fold change values.
#'
#' @author Original Author: Leo Lahti. Adapted by Sudarshan A. Shetty
#'
#' @references
#' Heath JP, Borowski P. (2013). Quantifying Proportional Variability.
#' \emph{PLoS ONE} 8(12): e84074.
#' \url{https://doi.org/10.1371/journal.pone.0084074}
#'
#' Heath J. (2006) Quantifying temporal variability in population abundances.
#' \emph{Oikos} 115, no. 3 (2006): 573-581.
#' \url{https://doi.org/10.1111/j.2006.0030-1299.15067.x}
#'
#' Shetty SA (2021). Utilities for microbiome analytics.
#' \url{https://github.com/RIVM-IIV-Microbiome/biomeUtils}
#'
#' @importFrom tibble rownames_to_column
#' @importFrom dplyr %>%
#'
#' @export

taxaPV <- function(x,
subsample = TRUE,
lower_conf=0.025,
upper_conf=0.975,
sampling_n =9,
round_pv_vals = 3){

if (class(x) != "phyloseq") {
stop("Input is not an object of phyloseq class")
}

if(subsample){
all.pv <- .taxa_pv(x)
sampled.pv <- .taxa_pv_subsampled(x,
lower_conf = lower_conf,
upper_conf = upper_conf,
sampling_n = sampling_n)
comb.pv <- cbind(all.pv, sampled.pv)
comb.pv <- round(comb.pv, digits = round_pv_vals)
comb.pv <- comb.pv %>%
tibble::rownames_to_column("FeatureID")
return(comb.pv)
}

all.pv <- .taxa_pv(x)
all.pv <- round(all.pv, digits = round_pv_vals)
all.pv <- all.pv %>%
tibble::rownames_to_column("FeatureID")
return(all.pv)
}


#' @importFrom phyloseq sample_names prune_samples nsamples
#' @importFrom stats quantile
.taxa_pv_subsampled <- function(x,
lower_conf = lower_conf,
upper_conf = upper_conf,
sampling_n = sampling_n){
rand_sams <- ps.sub <- txvp <- sx <- cis_df <- sxi <- NULL
s <- NULL
for (i in seq_len(sampling_n)) {
size_80 <- round(0.8*nsamples(x))
rand_sams <- sample(sample_names(x), size= size_80, replace = TRUE)
ps.sub <- prune_samples(sample_names(x) %in% rand_sams, x)
#ps.sub <- prune_taxa(taxa_sums(ps.sub) > 0, ps.sub)
txvp <- .taxa_pv(ps.sub)
#rownames(txvp) <- txvp$Taxa
s[[i]] <- txvp
}
sx <- suppressMessages(dplyr::bind_cols(s))
tx.pv.lci <- apply(sx, 1, function(x){
quantile(x,lower_conf,na.rm=TRUE)
})
tx.pv.uci <- apply(sx, 1, function(x){
quantile(x,upper_conf,na.rm=TRUE)
})

cis <- data.frame(LowerCI = tx.pv.lci,
UpperCI = tx.pv.uci)
return(cis)
}

#' @importFrom microbiome abundances
## Overall PV calculations
.taxa_pv <- function(x) {

sp_tab_tax <- NULL
# proportional variability index (PV)
sp_tab_tax <- t(abundances(x))
pv.d <- apply(sp_tab_tax, 2, .cal.pv)
pv.d <- data.frame(pv = pv.d)
return(pv.d)
}

# Modified PV Formula to account for sparsity
.cal.pv <- function(x){
k = (1/100)*mean(x) # get low pseudocount
pv_val_tax <- .pv.index(x + k)
return(pv_val_tax)
}

# Main PV Formula
#' @importFrom utils combn
.pv.index <- function (Z){
n = length(Z)
pairs = combn(Z,2)
min_z = apply(pairs,2, min)
max_z = apply(pairs,2, max)
z = 1- (min_z/max_z)
PV=2*sum(z)/(n*(n-1))
return(PV)
}
3 changes: 2 additions & 1 deletion R/test.ONE.association.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ test.ONE.association <- function(columns, data_for_testing, TYPES, variables.of.
if (is.element(type.1, c("binary", "categorical", "ordinal")) &
is.element(type.2, c("binary", "categorical", "ordinal")) & (tested == FALSE)) {
CHM.test <- coin::cmh_test(as.factor(response) ~ as.factor(treatment) | stratum,
data = response.by.treatment, distribution = approximate(nresample = B.i)
data = response.by.treatment,
distribution = approximate(nresample = B.i)
)
p.value <- as.numeric(pvalue(CHM.test))
if ((type.1 == "binary" & type.2 != "categorical") | (type.2 == "binary" & type.1 != "categorical")) {
Expand Down
70 changes: 70 additions & 0 deletions man/taxaPV.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion vignettes/biomestats.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ data("FuentesIliGutData")
ps <- subset_samples(FuentesIliGutData, ILI %in% c("C", "L1"))
ps.gen <- tax_glom(ps, "Genus")
#saveRDS(ps.gen, "data-raw/ps.gen.rds")
#ps.gen <- readRDS("../data-raw/ps.gen.rds")
ps.gen <- readRDS("../data-raw/ps.gen.rds")
ps.rel <- microbiome::transform(ps.gen, "compositional")
taxa_names(ps.rel) <- make.unique(tax_table(ps.rel)[,"Genus"])
sample_data(ps.rel)$sample_ids <- sample_names(ps.rel)
Expand Down Expand Up @@ -87,6 +87,7 @@ gen_tab <- prepTaxaAbundanceData(ps.rel,
prevalence_threshold=50/100)
dim(gen_tab)
```

## Variables to test
Expand Down

0 comments on commit 8519b39

Please sign in to comment.