From 88aebcb1e0d7fbfc942070040af9dd29db59a1f4 Mon Sep 17 00:00:00 2001 From: sdgamboa Date: Mon, 2 Sep 2024 19:11:23 -0400 Subject: [PATCH] Add scml function for the dataset with spike-in bacteria --- DESCRIPTION | 4 +-- NAMESPACE | 1 + NEWS.md | 5 +++ R/scml.R | 50 ++++++++++++++++++++++++++ man/MicrobiomeBenchmarkData.Rd | 24 +++++++++++++ man/scml.Rd | 28 +++++++++++++++ tests/testthat/test-smcl.R | 6 ++++ vignettes/recalibrare_spikein_data.Rmd | 7 ++++ 8 files changed, 123 insertions(+), 2 deletions(-) create mode 100644 R/scml.R create mode 100644 man/scml.Rd create mode 100644 tests/testthat/test-smcl.R diff --git a/DESCRIPTION b/DESCRIPTION index 63ec8c7..9dd1384 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: MicrobiomeBenchmarkData Title: Datasets for benchmarking in microbiome research -Version: 1.7.0 +Version: 1.7.1 Description: The MicrobiomeBenchmarkData package provides functionality to access microbiome datasets suitable for benchmarking. These datasets have some biological truth, which allows to have expected results for @@ -58,6 +58,6 @@ URL: https://github.com/waldronlab/MicrobiomeBenchmarkData, BiocType: ExperimentData Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.1 +RoxygenNote: 7.3.2 VignetteBuilder: knitr Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 3aa5ef7..4854e9a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ export(getBenchmarkData) export(removeCache) +export(scml) import(SummarizedExperiment) importFrom(BiocFileCache,BiocFileCache) importFrom(BiocFileCache,bfcadd) diff --git a/NEWS.md b/NEWS.md index 70a7054..5a15a92 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ # Changes in version 0.99.0 (2021-09-26) + Added a `NEWS.md` file to track changes to the package. + +# Changes in version 1.7.1 (2021-09-26) + ++ Added a function (`scml`) for re-calibrating the "Stammler_2016_16S_spikein" +dataset with SCML: spike-in-based calibration to total microbial load. \ No newline at end of file diff --git a/R/scml.R b/R/scml.R new file mode 100644 index 0000000..42f2617 --- /dev/null +++ b/R/scml.R @@ -0,0 +1,50 @@ +#' SCML: spike-in-based calibration to total microbial load +#' +#' The \code{scml} function applies the +#' spike-in-based calibration to total microbial load (SCML) method to +#' +#' @param tse A treeSummarizedExperiment from the \code{getBenchmarkData} +#' function. +#' @param bac A character. One of the following options: +#' s = Salinibacter ruber (AF323500), r = Rhizobium radiobacter (AB247615), +#' a, = Alicyclobacillus acidiphilus (AB076660) +#' +#' @return A TreeSummarizedExperiment with SCML data instead of counts. +#' @export +#' +#' @examples +#' tse <- getBenchmarkData("Stammler_2016_16S_spikein", dryrun = FALSE)[[1]] +#' tseSCML <- scml(tse, bac = "s") +#' +scml <- function(tse, bac = c("s", "r", "a")) { + bacLetter <- match.arg(bac) + bacNames <- c( + s = "AF323500XXXX", r = "AB247615XXXX", a = "AB076660XXXX" + ) + bacFullNames <- c( + AF323500XXXX = "Salinibacter ruber (AF323500)", + AB247615XXXX = "Rhizobium radiobacter (AB247615)", + AB076660XXXX = "Alicyclobacillus acidiphilus (AB076660)" + + ) + bacName <- bacNames[bacLetter] + + lgl <- bacName %in% rownames(tse) + if (!lgl) { + stop( + "Feature", bacName, "not found.", + "Are you sure you're using the Stammler_2016_16S_spikein", + call. = FALSE + ) + } + message("Re-calibrating counts with ", bacFullNames[bacName]) + counts <- SummarizedExperiment::assay(tse, 1) + bacAb <- counts[bacName, ] + sizeFactor <- bacAb/mean(bacAb) + scmlData <- counts + for(i in seq(ncol(scmlData))){ + scmlData[,i] <- round(scmlData[,i] / sizeFactor[i]) + } + SummarizedExperiment::assay(tse) <- scmlData + return(tse) +} diff --git a/man/MicrobiomeBenchmarkData.Rd b/man/MicrobiomeBenchmarkData.Rd index 10d641e..44eb24b 100644 --- a/man/MicrobiomeBenchmarkData.Rd +++ b/man/MicrobiomeBenchmarkData.Rd @@ -2,9 +2,33 @@ % Please edit documentation in R/MicrobiomeBenchmarkData-package.R \docType{package} \name{MicrobiomeBenchmarkData} +\alias{MicrobiomeBenchmarkData-package} \alias{MicrobiomeBenchmarkData} \title{MicrobiomeBenchmarkData} \description{ The \code{MicrobiomeBenchmarkData} provide functions for accessing various microbiome datasets with biological ground truth. } +\seealso{ +Useful links: +\itemize{ + \item \url{https://github.com/waldronlab/MicrobiomeBenchmarkData} + \item \url{http://waldronlab.io/MicrobiomeBenchmarkData/} + \item Report bugs at \url{https://github.com/waldronlab/MicrobiomeBenchmarkData/issues} +} + +} +\author{ +\strong{Maintainer}: Samuel Gamboa \email{Samuel.Gamboa.Tuz@gmail.com} (\href{https://orcid.org/0000-0002-6863-7943}{ORCID}) + +Authors: +\itemize{ + \item Levi Waldron (\href{https://orcid.org/0000-0003-2725-0694}{ORCID}) +} + +Other contributors: +\itemize{ + \item Marcel Ramos [contributor] +} + +} diff --git a/man/scml.Rd b/man/scml.Rd new file mode 100644 index 0000000..b8f2b1e --- /dev/null +++ b/man/scml.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/scml.R +\name{scml} +\alias{scml} +\title{SCML: spike-in-based calibration to total microbial load} +\usage{ +scml(tse, bac = c("s", "r", "a")) +} +\arguments{ +\item{tse}{A treeSummarizedExperiment from the \code{getBenchmarkData} +function.} + +\item{bac}{A character. One of the following options: +s = Salinibacter ruber (AF323500), r = Rhizobium radiobacter (AB247615), +a, = Alicyclobacillus acidiphilus (AB076660)} +} +\value{ +A TreeSummarizedExperiment with SCML data instead of counts. +} +\description{ +The \code{scml} function applies the +spike-in-based calibration to total microbial load (SCML) method to +} +\examples{ +tse <- getBenchmarkData("Stammler_2016_16S_spikein", dryrun = FALSE)[[1]] +tseSCML <- scml(tse, bac = "s") + +} diff --git a/tests/testthat/test-smcl.R b/tests/testthat/test-smcl.R new file mode 100644 index 0000000..c50d43b --- /dev/null +++ b/tests/testthat/test-smcl.R @@ -0,0 +1,6 @@ +test_that("scml works", { + tse <- suppressWarnings(getBenchmarkData("Stammler_2016_16S_spikein", dryrun = FALSE)[[1]]) + expect_s4_class(scml(tse, bac = "s"), "TreeSummarizedExperiment") + expect_s4_class(scml(tse, bac = "r"), "TreeSummarizedExperiment") + expect_s4_class(scml(tse, bac = "a"), "TreeSummarizedExperiment") +}) diff --git a/vignettes/recalibrare_spikein_data.Rmd b/vignettes/recalibrare_spikein_data.Rmd index 2a9c45e..99d0f48 100644 --- a/vignettes/recalibrare_spikein_data.Rmd +++ b/vignettes/recalibrare_spikein_data.Rmd @@ -108,6 +108,13 @@ assay(tse) <- SCML_data tse ``` +## A more convenient way using the scml function included in the package: + +```{r} +tse <- getBenchmarkData('Stammler_2016_16S_spikein', dryrun = FALSE)[[1]] +tse <- scml(tse,bac = "s") +``` + # Session information ```{r}