From e94256c939b593fdf437bdb2ffa543f533512858 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Thu, 30 May 2024 11:59:19 -0400 Subject: [PATCH 1/8] discord server links --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 7ae7ecf..d9ba0a0 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,14 @@ [![R-CMD-check](https://github.com/microbiomeDB/MicrobiomeDB/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/microbiomeDB/MicrobiomeDB/actions/workflows/R-CMD-check.yaml) + [![](https://dcbadge.limes.pink/api/server/DEjNbxgq?style=flat)](https://discord.gg/DEjNbxgq) # MicrobiomeDB + A suite of R packages containing all of the data from MicrobiomeDB.org, and tools for analyzing and visualizing the same. +If you have questions or comments of any kind, join us on our [Discord Community Server](https://discord.gg/DEjNbxgq)! We'd love to hear from you. + ## Installation Use the R package [remotes](https://cran.r-project.org/web/packages/remotes/index.html) to install MicrobiomeDB. From the R command prompt: From 9ffcf0cd07fe5191f70c7d1c8aad95f3f09e0280 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Tue, 4 Jun 2024 10:09:49 -0400 Subject: [PATCH 2/8] add TreeSummarizedExperiment and mia deps --- DESCRIPTION | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7b5c14b..1b802a0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,9 +15,11 @@ Imports: veupathUtils, Maaslin2, methods, + mia, microbiomeComputations, phyloseq, - purrr + purrr, + TreeSummarizedExperiment Remotes: microbiomeDB/veupathUtils, microbiomeDB/corGraph, From f1ec5d701dd4f281e5f9b0215ba7131e11b526f2 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Tue, 4 Jun 2024 10:09:57 -0400 Subject: [PATCH 3/8] draft user data import methods --- R/importers-MbioDataset.R | 285 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 285 insertions(+) create mode 100644 R/importers-MbioDataset.R diff --git a/R/importers-MbioDataset.R b/R/importers-MbioDataset.R new file mode 100644 index 0000000..eec8dce --- /dev/null +++ b/R/importers-MbioDataset.R @@ -0,0 +1,285 @@ + +buildCollectionFromTreeSE <- function( + collectionName = list(assayDataName = NULL, rowDataColumnName = NULL), + rowData, + assayData, + normalizationMethod = c("TSS", "none"), + verbose = c(TRUE, FALSE) +) { + verbose <- veupathUtils::matchArg(verbose) + + assayDataName <- collectionName$assayDataName + rowDataColumnName <- collectionName$rowDataColumnName + + if (is.null(assayDataName) || is.null(rowDataColumnName)) { + stop("Must specify both assayDataName and rowDataColumnName as named elements of the collectionName list argument") + } + + assayDT <- as.data.frame.matrix(assayData, col.names = colnames(assayData), row.names = row.names(assayData)) + dt <- data.table::as.data.table(merge(assayDT, rowData[rowDataColumnName], by = 0)) + dt$Row.names <- NULL + + recordIDs <- names(dt)[names(dt) != rowDataColumnName] + dt <- dt[, lapply(.SD, sum, na.rm=TRUE), by=rowDataColumnName] + dt <- data.table::transpose(dt, make.names=rowDataColumnName) + + # if this does grow into other methods, the normalization step could be factored out probably + if (normalizationMethod == "TSS") { + dt <- dt / rowSums(dt) + } + + dt$recordIDs <- recordIDs + + recordIdColumn <- 'recordIDs' + ancestorIdColumns <- NULL + collectionName <- paste0(assayDataName, ": ", rowDataColumnName) + if (normalizationMethod != "none") { + collectionName <- paste0(collectionName, " (", normalizationMethod, " normalized)") + } + + collection <- Collection( + data = dt, + recordIdColumn = recordIdColumn, + ancestorIdColumns = ancestorIdColumns, + collectionName = collectionName + ) + + return(collection) +} + +#' Import TreeSummarizedExperiment +#' +#' Import data from TreeSummarizedExperiment to MbioDataset. +#' There is some loss of granularity in this process. It results +#' in a simpler and more performant object which is compliant +#' with the MicrobiomeDB infrastructure. +#' +#' @param data A TreeSummarizedExperiment +#' @param normalizationMethod Normalization method to use on they assay data. Options are "none" and "TSS". +#' Applying TSS normalization to absolute taxonomic abundances produces relative taxonomic abundances. Default is "TSS". +#' @param keepRawValues Keep the raw assay values as well as the normalized values. +#' @param verbose Print messages +#' @return A MbioDataset +#' @export +importTreeSummarizedExperiment <- function(data, normalizationMethod = c("TSS", "none"), keepRawValues = c(TRUE, FALSE), verbose = c(TRUE, FALSE)) { + + normalizationMethod <- veupathUtils::matchArg(normalizationMethod) + keepRawValues <- veupathUtils::matchArg(keepRawValues) + verbose <- veupathUtils::matchArg(verbose) + + if (keepRawValues == FALSE && normalizationMethod == "none") { + stop("keepRawValues must be TRUE when normalizationMethod is 'none'") + } + + # figure out what all assays we have, and what data is available per assay + # these will become collections in the MbioDataset + collectionsDTList <- lapply(names(data@assays), function(x) { + data.table::data.table(assayDataName = x, rowDataColumnName = row.names(data@assays[[x]])) + }) + collectionsDT <- purrr::reduce(collectionsDT, rbind) + + if (keepRawValues) { + # call buildCollectionFromTreeSE for each column of each assay data, w normalization 'none' + collectionsByAssayList <- apply(collectionsDT, 1, function(x) { + buildCollectionFromTreeSE( + collectionName = as.list(x, keep.names=TRUE), + rowData = data@rowData, + assayData = data@assays[[x$assayDataName]], + normalizationMethod = "none", + verbose = verbose + ) + }) + rawCollectionsList <- purrr::reduce(collectionsByAssayList, c) + } + + if (normalizationMethod != "none") { + collectionsByAssayList <- apply(collectionsDT, 1, function(x) { + buildCollectionFromTreeSE( + collectionName = as.list(x, keep.names=TRUE), + rowData = data@rowData, + assayData = data@assays[[x$assayDataName]], + normalizationMethod = normalizationMethod, + verbose = verbose + ) + }) + normalizedCollectionsList <- purrr::reduce(collectionsByAssayList, c) + } + + collectionsList <- c(rawCollectionsList, normalizedCollectionsList) + + ## TODO add some sort of check and set this conditionally + imputeZero <- FALSE + + # build and validate MbioDataset, colData becomes sampleMetadata + mbioDataset <- MbioDataset( + collections = collectionsList, + metadata = SampleMetadata(data@colData), + imputeZero = imputeZero) + + # return a MbioDataset + return(mbioDataset) +} + +## lean on miaverse to import biom, phyloseq, csv, etc +## TODO do these also needs args about relative abundances? id think so.. + +#' Import HUMAnN data +#' +#' Import data from HUMAnN results to MbioDataset. There is +#' some loss of granularity in this process. It results +#' in a simpler and more performant object which is compliant +#' with the MicrobiomeDB infrastructure. See \code{mia::importHUMAnN} +#' for documentation. +#' +#' @param normalizationMethod Normalization method to use on they assay data. Options are "none" and "TSS". +#' Applying TSS normalization to absolute taxonomic abundances produces relative taxonomic abundances. Default is "TSS". +#' @param keepRawValues Keep the raw assay values as well as the normalized values. +#' @param verbose Print messages +#' @param ... Arguments to pass to mia::importHUMAnN +#' @return A MbioDataset +#' @export +#' @importFrom mia importHUMAnN +importHUMAnN <- function(normalizationMethod = c("TSS", "none"), keepRawValues = c(TRUE, FALSE), verbose = c(TRUE, FALSE), ...) { + treeSE <- mia::importHUMAnN(...) + + mbioDataset <- importTreeSummarizedExperiment(treeSE, normalizationMethod = normalizationMethod, keepRawValues = keepRawValues, verbose = verbose) + return(mbioDataset) +} + +#' Import MetaPhlAn data +#' +#' Import data from MetaPhlAn results to MbioDataset. There is +#' some loss of granularity in this process. It results +#' in a simpler and more performant object which is compliant +#' with the MicrobiomeDB infrastructure. See \code{mia::importMetaPhlAn} +#' for documentation. +#' +#' @param normalizationMethod Normalization method to use on they assay data. Options are "none" and "TSS". +#' Applying TSS normalization to absolute taxonomic abundances produces relative taxonomic abundances. Default is "TSS". +#' @param keepRawValues Keep the raw assay values as well as the normalized values. +#' @param verbose Print messages +#' @param ... Arguments to pass to mia::importMetaPhlAn +#' @return A MbioDataset +#' @export +#' @importFrom mia importMetaPhlAn +importMetaPhlAn <- function(normalizationMethod = c("TSS", "none"), keepRawValues = c(TRUE, FALSE), verbose = c(TRUE, FALSE), ...) { + treeSE <- mia::importMetaPhlAn(...) + + mbioDataset <- importTreeSummarizedExperiment(treeSE, normalizationMethod = normalizationMethod, keepRawValues = keepRawValues, verbose = verbose) + return(mbioDataset) +} + +#' Import MOTHUR data +#' +#' Import data from MOTHUR results to MbioDataset. There is +#' some loss of granularity in this process. It results +#' in a simpler and more performant object which is compliant +#' with the MicrobiomeDB infrastructure. See \code{mia::importMothur} +#' for documentation. +#' +#' @param normalizationMethod Normalization method to use on they assay data. Options are "none" and "TSS". +#' Applying TSS normalization to absolute taxonomic abundances produces relative taxonomic abundances. Default is "TSS". +#' @param keepRawValues Keep the raw assay values as well as the normalized values. +#' @param verbose Print messages +#' @param ... Arguments to pass to mia::importMothur +#' @return A MbioDataset +#' @export +#' @importFrom mia importMothur +importMothur <- function(normalizationMethod = c("TSS", "none"), keepRawValues = c(TRUE, FALSE), verbose = c(TRUE, FALSE), ...) { + treeSE <- mia::importMothur(...) + + mbioDataset <- importTreeSummarizedExperiment(treeSE, normalizationMethod = normalizationMethod, keepRawValues = keepRawValues, verbose = verbose) + return(mbioDataset) +} + +#' Import QIIME2 data +#' +#' Import data from QIIME2 results to MbioDataset. There is +#' some loss of granularity in this process. It results +#' in a simpler and more performant object which is compliant +#' with the MicrobiomeDB infrastructure. See \code{mia::importQIIME2} +#' for documentation. +#' +#' @param normalizationMethod Normalization method to use on they assay data. Options are "none" and "TSS". +#' Applying TSS normalization to absolute taxonomic abundances produces relative taxonomic abundances. Default is "TSS". +#' @param keepRawValues Keep the raw assay values as well as the normalized values. +#' @param verbose Print messages +#' @param ... Arguments to pass to mia::importQIIME2 +#' @return A MbioDataset +#' @export +#' @importFrom mia importQIIME2 +importQIIME2 <- function(normalizationMethod = c("TSS", "none"), keepRawValues = c(TRUE, FALSE), verbose = c(TRUE, FALSE), ...) { + treeSE <- mia::importQIIME2(...) + + mbioDataset <- importTreeSummarizedExperiment(treeSE, normalizationMethod = normalizationMethod, keepRawValues = keepRawValues, verbose = verbose) + return(mbioDataset) +} + +#' Import BIOM data +#' +#' Import data from BIOM results to MbioDataset. There is +#' some loss of granularity in this process. It results +#' in a simpler and more performant object which is compliant +#' with the MicrobiomeDB infrastructure. See \code{mia::makeTreeSEFromBiom} +#' for documentation. +#' +#' @param normalizationMethod Normalization method to use on they assay data. Options are "none" and "TSS". +#' Applying TSS normalization to absolute taxonomic abundances produces relative taxonomic abundances. Default is "TSS". +#' @param keepRawValues Keep the raw assay values as well as the normalized values. +#' @param verbose Print messages +#' @param ... Arguments to pass to mia::makeTreeSEFromBiom +#' @return A MbioDataset +#' @export +#' @importFrom mia makeTreeSEFromBiom +importBIOM <- function(normalizationMethod = c("TSS", "none"), keepRawValues = c(TRUE, FALSE), verbose = c(TRUE, FALSE), ...) { + treeSE <- mia::makeTreeSEFromBiom(...) + + mbioDataset <- importTreeSummarizedExperiment(treeSE, normalizationMethod = normalizationMethod, keepRawValues = keepRawValues, verbose = verbose) + return(mbioDataset) +} + +#' Import DADA2 data +#' +#' Import data from DADA2 results to MbioDataset. There is +#' some loss of granularity in this process. It results +#' in a simpler and more performant object which is compliant +#' with the MicrobiomeDB infrastructure. See \code{mia::makeTreeSEFromDADA2} +#' for documentation. +#' +#' @param normalizationMethod Normalization method to use on they assay data. Options are "none" and "TSS". +#' Applying TSS normalization to absolute taxonomic abundances produces relative taxonomic abundances. Default is "TSS". +#' @param keepRawValues Keep the raw assay values as well as the normalized values. +#' @param verbose Print messages +#' @param ... Arguments to pass to mia::makeTreeSEFromDADA2 +#' @return A MbioDataset +#' @export +#' @importFrom mia makeTreeSEFromDADA2 +importDADA2 <- function(normalizationMethod = c("TSS", "none"), keepRawValues = c(TRUE, FALSE), verbose = c(TRUE, FALSE), ...) { + treeSE <- mia::makeTreeSEFromDADA2(...) + + mbioDataset <- importTreeSummarizedExperiment(treeSE, normalizationMethod = normalizationMethod, keepRawValues = keepRawValues, verbose = verbose) + return(mbioDataset) +} + +#' Import Phyloseq data +#' +#' Import data from Phyloseq results to MbioDataset. There is +#' some loss of granularity in this process. It results +#' in a simpler and more performant object which is compliant +#' with the MicrobiomeDB infrastructure. See \code{mia::makeTreeSEFromPhyloseq} +#' for documentation. +#' +#' @param normalizationMethod Normalization method to use on they assay data. Options are "none" and "TSS". +#' Applying TSS normalization to absolute taxonomic abundances produces relative taxonomic abundances. Default is "TSS". +#' @param keepRawValues Keep the raw assay values as well as the normalized values. +#' @param verbose Print messages +#' @param ... Arguments to pass to mia::makeTreeSEFromPhyloseq +#' @return A MbioDataset +#' @export +#' @importFrom mia makeTreeSEFromPhyloseq +importPhyloseq <- function(normalizationMethod = c("TSS", "none"), keepRawValues = c(TRUE, FALSE), verbose = c(TRUE, FALSE), ...) { + treeSE <- mia::makeTreeSEFromPhyloseq(...) + + mbioDataset <- importTreeSummarizedExperiment(treeSE, normalizationMethod = normalizationMethod, keepRawValues = keepRawValues, verbose = verbose) + return(mbioDataset) +} \ No newline at end of file From 3c18187c0169fa3142247b592c1aea1caf5e6d44 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Wed, 5 Jun 2024 11:33:39 -0400 Subject: [PATCH 4/8] adding some data importing tests --- R/importers-MbioDataset.R | 110 +++++++---- tests/testthat/test-importers-MbioDataset.R | 201 ++++++++++++++++++++ 2 files changed, 271 insertions(+), 40 deletions(-) create mode 100644 tests/testthat/test-importers-MbioDataset.R diff --git a/R/importers-MbioDataset.R b/R/importers-MbioDataset.R index eec8dce..3fb268e 100644 --- a/R/importers-MbioDataset.R +++ b/R/importers-MbioDataset.R @@ -1,7 +1,7 @@ buildCollectionFromTreeSE <- function( collectionName = list(assayDataName = NULL, rowDataColumnName = NULL), - rowData, + rowData, # this is a data.frame representing the row data/ tree assayData, normalizationMethod = c("TSS", "none"), verbose = c(TRUE, FALSE) @@ -31,17 +31,17 @@ buildCollectionFromTreeSE <- function( dt$recordIDs <- recordIDs recordIdColumn <- 'recordIDs' - ancestorIdColumns <- NULL + ancestorIdColumns <- character(0) collectionName <- paste0(assayDataName, ": ", rowDataColumnName) if (normalizationMethod != "none") { collectionName <- paste0(collectionName, " (", normalizationMethod, " normalized)") } - collection <- Collection( + collection <- veupathUtils::Collection( data = dt, recordIdColumn = recordIdColumn, ancestorIdColumns = ancestorIdColumns, - collectionName = collectionName + name = collectionName ) return(collection) @@ -60,6 +60,10 @@ buildCollectionFromTreeSE <- function( #' @param keepRawValues Keep the raw assay values as well as the normalized values. #' @param verbose Print messages #' @return A MbioDataset +#' @importFrom purrr reduce +#' @importFrom data.table data.table +#' @importFrom SummarizedExperiment rowData +#' @rdname importTreeSummarizedExperiment #' @export importTreeSummarizedExperiment <- function(data, normalizationMethod = c("TSS", "none"), keepRawValues = c(TRUE, FALSE), verbose = c(TRUE, FALSE)) { @@ -67,59 +71,84 @@ importTreeSummarizedExperiment <- function(data, normalizationMethod = c("TSS", keepRawValues <- veupathUtils::matchArg(keepRawValues) verbose <- veupathUtils::matchArg(verbose) + if (!inherits(data, "SummarizedExperiment")) { + stop("data must be or extend a SummarizedExperiment") + } + if (keepRawValues == FALSE && normalizationMethod == "none") { stop("keepRawValues must be TRUE when normalizationMethod is 'none'") } # figure out what all assays we have, and what data is available per assay # these will become collections in the MbioDataset - collectionsDTList <- lapply(names(data@assays), function(x) { - data.table::data.table(assayDataName = x, rowDataColumnName = row.names(data@assays[[x]])) + # TODO it looks like rowData is expected to be same across all assays, + # which is odd to me, but probably means we can simplify this logic + collectionsDTList <- lapply(names(data@assays@data), function(x) { + data.table::data.table(assayDataName = x, rowDataColumnName = colnames(SummarizedExperiment::rowData(data))) }) - collectionsDT <- purrr::reduce(collectionsDT, rbind) - - if (keepRawValues) { - # call buildCollectionFromTreeSE for each column of each assay data, w normalization 'none' - collectionsByAssayList <- apply(collectionsDT, 1, function(x) { - buildCollectionFromTreeSE( - collectionName = as.list(x, keep.names=TRUE), - rowData = data@rowData, - assayData = data@assays[[x$assayDataName]], - normalizationMethod = "none", - verbose = verbose - ) - }) - rawCollectionsList <- purrr::reduce(collectionsByAssayList, c) - } - - if (normalizationMethod != "none") { - collectionsByAssayList <- apply(collectionsDT, 1, function(x) { - buildCollectionFromTreeSE( - collectionName = as.list(x, keep.names=TRUE), - rowData = data@rowData, - assayData = data@assays[[x$assayDataName]], - normalizationMethod = normalizationMethod, - verbose = verbose - ) - }) - normalizedCollectionsList <- purrr::reduce(collectionsByAssayList, c) + collectionsDT <- purrr::reduce(collectionsDTList, rbind) + + if (nrow(collectionsDT) != 0) { + rawCollectionsList <- list() + if (keepRawValues) { + # call buildCollectionFromTreeSE for each column of each assay data, w normalization 'none' + collectionsByAssayList <- apply(collectionsDT, 1, function(x) { + collectionName = as.list(x, keep.names=TRUE); + + buildCollectionFromTreeSE( + collectionName = collectionName, + rowData = as.data.frame(SummarizedExperiment::rowData(data)), + assayData = data@assays@data[[collectionName$assayDataName]], + normalizationMethod = "none", + verbose = verbose + ) + }) + rawCollectionsList <- purrr::reduce(collectionsByAssayList, c) + } + + normalizedCollectionsList <- list() + if (normalizationMethod != "none") { + collectionsByAssayList <- apply(collectionsDT, 1, function(x) { + collectionName = as.list(x, keep.names=TRUE); + + buildCollectionFromTreeSE( + collectionName = collectionName, + rowData = as.data.frame(SummarizedExperiment::rowData(data)), + assayData = data@assays@data[[collectionName$assayDataName]], + normalizationMethod = normalizationMethod, + verbose = verbose + ) + }) + normalizedCollectionsList <- purrr::reduce(collectionsByAssayList, c) + } + + collectionsList <- c(rawCollectionsList, normalizedCollectionsList) + } else { + collectionsList <- veupathUtils::Collections() } - collectionsList <- c(rawCollectionsList, normalizedCollectionsList) - - ## TODO add some sort of check and set this conditionally - imputeZero <- FALSE # build and validate MbioDataset, colData becomes sampleMetadata + colData <- SummarizedExperiment::colData(data) + metadataDT <- data.table::data.table() + if (!!length(colData)) { + metadataDT <- data.table::as.data.table(SummarizedExperiment::colData(data)) + metadataDT$recordIDs <- rownames(SummarizedExperiment::colData(data)) + } + if (length(metadataDT) == 1) metadataDT <- data.table::data.table() + mbioDataset <- MbioDataset( collections = collectionsList, - metadata = SampleMetadata(data@colData), - imputeZero = imputeZero) + metadata = SampleMetadata(data = metadataDT, recordIdColumn = "recordIDs") + ) # return a MbioDataset return(mbioDataset) } +#' @rdname importTreeSummarizedExperiment +importTreeSE <- importTreeSummarizedExperiment + ## lean on miaverse to import biom, phyloseq, csv, etc ## TODO do these also needs args about relative abundances? id think so.. @@ -232,7 +261,8 @@ importQIIME2 <- function(normalizationMethod = c("TSS", "none"), keepRawValues = #' @export #' @importFrom mia makeTreeSEFromBiom importBIOM <- function(normalizationMethod = c("TSS", "none"), keepRawValues = c(TRUE, FALSE), verbose = c(TRUE, FALSE), ...) { - treeSE <- mia::makeTreeSEFromBiom(...) + biom <- biomformat::read_biom(...) + treeSE <- mia::makeTreeSEFromBiom(obj=biom) mbioDataset <- importTreeSummarizedExperiment(treeSE, normalizationMethod = normalizationMethod, keepRawValues = keepRawValues, verbose = verbose) return(mbioDataset) diff --git a/tests/testthat/test-importers-MbioDataset.R b/tests/testthat/test-importers-MbioDataset.R new file mode 100644 index 0000000..04a4ef2 --- /dev/null +++ b/tests/testthat/test-importers-MbioDataset.R @@ -0,0 +1,201 @@ +test_that("buildCollectionFromTreeSE works", { + # should return a Collection, and validate the normalization is accurate + + # use simple mock data for this + # treeSE should have matrix class for assay data and data.frame class for row data + assay_data <- rbind(rep(0, 4), matrix(1:20, nrow = 5)) + colnames(assay_data) <- paste0("sample", 1:4) + rownames(assay_data) <- paste("entity", seq_len(6), sep = "") + + row_data <- data.frame(Kingdom = "A", + Phylum = rep(c("B1", "B2"), c(2, 4)), + Class = rep(c("C1", "C2", "C3"), each = 2), + OTU = paste0("D", 1:6), + row.names = rownames(assay_data), + stringsAsFactors = FALSE) + + # no normalization + collectionRaw <- buildCollectionFromTreeSE( + collectionName = list(assayDataName = "test", rowDataColumnName = "OTU"), + rowData = row_data, + assayData = assay_data, + normalizationMethod = "none", + verbose = TRUE + ) + + expect_equal(inherits(collectionRaw, "Collection"), TRUE) + expect_equal(collectionRaw@name, "test: OTU") + expect_equal(length(collectionRaw@data), 7) # 6 OTUs + 1 recordIDs + + # TSS normalized + collectionNormalized <- buildCollectionFromTreeSE( + collectionName = list(assayDataName = "test", rowDataColumnName = "OTU"), + rowData = row_data, + assayData = assay_data, + normalizationMethod = "TSS", + verbose = TRUE + ) + + expect_equal(inherits(collectionNormalized, "Collection"), TRUE) + expect_equal(collectionNormalized@name, "test: OTU (TSS normalized)") + expect_equal(length(collectionNormalized@data), 7) + expect_equal(all(rowSums(collectionNormalized@data[, -"recordIDs"]) == 1), TRUE) + + # try with Class, make sure its aggregating OTU to the Class level + collectionClass <- buildCollectionFromTreeSE( + collectionName = list(assayDataName = "test", rowDataColumnName = "Class"), + rowData = row_data, + assayData = assay_data, + normalizationMethod = "TSS", + verbose = TRUE + ) + + expect_equal(inherits(collectionClass, "Collection"), TRUE) + expect_equal(collectionClass@name, "test: Class (TSS normalized)") + expect_equal(length(collectionClass@data), 4) + expect_equal(all(rowSums(collectionClass@data[, -"recordIDs"]) == 1), TRUE) +}) + +test_that("we can get an MbioDataset from a TreeSummarizedExperiment", { + data(GlobalPatterns, package="mia") + tse <- GlobalPatterns + + # no normalization, with raw values + mbioDataset <- importTreeSummarizedExperiment(tse, normalizationMethod = "none", keepRawValues = TRUE, verbose = TRUE) + expect_equal(inherits(mbioDataset, "MbioDataset"), TRUE) + expect_equal("counts: Genus" %in% getCollectionNames(mbioDataset), TRUE) + expect_equal("counts: Genus (TSS normalized)" %in% getCollectionNames(mbioDataset), FALSE) + expect_equal(length(getCollection(mbioDataset, "counts: Genus")@data) > 1, TRUE) + + # TSS normalization, no raw values + mbioDataset <- importTreeSummarizedExperiment(tse, normalizationMethod = "TSS", keepRawValues = FALSE, verbose = TRUE) + expect_equal(inherits(mbioDataset, "MbioDataset"), TRUE) + expect_equal("counts: Genus (TSS normalized)" %in% getCollectionNames(mbioDataset), TRUE) + expect_equal("counts: Genus" %in% getCollectionNames(mbioDataset), FALSE) + expect_equal(length(getCollection(mbioDataset, "counts: Genus (TSS normalized)")@data) > 1, TRUE) + + # TSS normalization, with raw values + mbioDataset <- importTreeSummarizedExperiment(tse, normalizationMethod = "TSS", keepRawValues = TRUE, verbose = TRUE) + expect_equal(inherits(mbioDataset, "MbioDataset"), TRUE) + expect_equal("counts: Genus (TSS normalized)" %in% getCollectionNames(mbioDataset), TRUE) + expect_equal("counts: Genus" %in% getCollectionNames(mbioDataset), TRUE) + expect_equal(length(getCollection(mbioDataset, "counts: Genus (TSS normalized)")@data) > 1, TRUE) + expect_equal(length(getCollection(mbioDataset, "counts: Genus")@data) > 1, TRUE) + + # no normalization, no raw values (should err) + expect_error(importTreeSummarizedExperiment(tse, normalizationMethod = "none", keepRawValues = FALSE, verbose = TRUE)) + + # not a summarized experiment + expect_error(importTreeSummarizedExperiment(data.frame(), normalizationMethod = "none", keepRawValues = FALSE, verbose = TRUE)) + +}) + +# most of the important testing is in mia, so +# we can test we get the right class back and its populated.. +test_that("the miaverse wrappers work", { + + # humann + file_path <- system.file("extdata", "humann_output.tsv", package = "mia") + + mbioDataset <- importHUMAnN(normalizationMethod = "none", keepRawValues = TRUE, verbose = TRUE, file_path) + + expect_equal(inherits(mbioDataset, "MbioDataset"), TRUE) + expect_equal(length(getCollectionNames(mbioDataset)) > 0, TRUE) + aCollectionName <- getCollectionNames(mbioDataset)[1] + aCollection <- getCollection(mbioDataset, aCollectionName) + expect_equal(inherits(aCollection, "Collection"), TRUE) + expect_equal(length(aCollection@data) > 0, TRUE) + + # mothur + counts <- system.file("extdata", "mothur_example.shared", package = "mia") + taxa <- system.file("extdata", "mothur_example.cons.taxonomy", package = "mia") + taxa2 <- system.file("extdata", "mothur_example.taxonomy", package = "mia") + meta <- system.file("extdata", "mothur_example.design", package = "mia") + + mbioDataset <- importMothur(normalizationMethod = "none", keepRawValues = TRUE, verbose = TRUE, counts) + + expect_equal(inherits(mbioDataset, "MbioDataset"), TRUE) + expect_equal(length(getCollectionNames(mbioDataset)) > 0, FALSE) # no real data, needs taxa file too + + mbioDataset <- importMothur(normalizationMethod = "none", keepRawValues = TRUE, verbose = TRUE, counts, taxa) + + expect_equal(inherits(mbioDataset, "MbioDataset"), TRUE) # this one should work but not be interesting without meta + expect_equal(length(getCollectionNames(mbioDataset)) > 0, TRUE) + aCollectionName <- getCollectionNames(mbioDataset)[1] + aCollection <- getCollection(mbioDataset, aCollectionName) + expect_equal(inherits(aCollection, "Collection"), TRUE) + expect_equal(length(aCollection@data) > 0, TRUE) + + mbioDataset <- importMothur(normalizationMethod = "none", keepRawValues = TRUE, verbose = TRUE, counts, taxa2) + + expect_equal(inherits(mbioDataset, "MbioDataset"), TRUE) # this one should work but not be interesting without meta + expect_equal(length(getCollectionNames(mbioDataset)) > 0, TRUE) + aCollectionName <- getCollectionNames(mbioDataset)[1] + aCollection <- getCollection(mbioDataset, aCollectionName) + expect_equal(inherits(aCollection, "Collection"), TRUE) + expect_equal(length(aCollection@data) > 0, TRUE) + + mbioDataset <- importMothur(normalizationMethod = "none", keepRawValues = TRUE, verbose = TRUE, counts, taxa, meta) + + expect_equal(inherits(mbioDataset, "MbioDataset"), TRUE) # this is the one we want really.. + expect_equal(length(getCollectionNames(mbioDataset)) > 0, TRUE) + aCollectionName <- getCollectionNames(mbioDataset)[1] + aCollection <- getCollection(mbioDataset, aCollectionName) + expect_equal(inherits(aCollection, "Collection"), TRUE) + expect_equal(length(aCollection@data) > 0, TRUE) + + # qiime2 + featureTableFile <- system.file("extdata", "table.qza", package = "mia") + taxonomyTableFile <- system.file("extdata", "taxonomy.qza", package = "mia") + + mbioDataset <- importQIIME2(normalizationMethod = "none", keepRawValues = TRUE, verbose = TRUE, featureTableFile, taxonomyTableFile) + + expect_equal(inherits(mbioDataset, "MbioDataset"), TRUE) + expect_equal(length(getCollectionNames(mbioDataset)) > 0, TRUE) + aCollectionName <- getCollectionNames(mbioDataset)[1] + aCollection <- getCollection(mbioDataset, aCollectionName) + expect_equal(inherits(aCollection, "Collection"), TRUE) + expect_equal(length(aCollection@data) > 0, TRUE) + + # biom + skip_if_not(require("biomformat", quietly = TRUE)) + rich_dense_file = system.file("extdata", "rich_dense_otu_table.biom", + package = "biomformat") + + mbioDataset <- importBIOM(normalizationMethod = "none", keepRawValues = TRUE, verbose = TRUE, rich_dense_file) + + expect_equal(inherits(mbioDataset, "MbioDataset"), TRUE) + expect_equal(length(getCollectionNames(mbioDataset)) > 0, TRUE) + aCollectionName <- getCollectionNames(mbioDataset)[1] + aCollection <- getCollection(mbioDataset, aCollectionName) + expect_equal(inherits(aCollection, "Collection"), TRUE) + expect_equal(length(aCollection@data) > 0, TRUE) + + # dada2 + skip_if_not_installed("dada2") + fnF <- system.file("extdata", "sam1F.fastq.gz", package="dada2") + fnR = system.file("extdata", "sam1R.fastq.gz", package="dada2") + dadaF <- dada2::dada(fnF, selfConsist=TRUE) + dadaR <- dada2::dada(fnR, selfConsist=TRUE) + + mbioDataset <- importDADA2(normalizationMethod = "none", keepRawValues = TRUE, verbose = TRUE, dadaF, fnF, dadaR, fnR) + + expect_equal(inherits(mbioDataset, "MbioDataset"), TRUE) + expect_equal(length(getCollectionNames(mbioDataset)) > 0, TRUE) + aCollectionName <- getCollectionNames(mbioDataset)[1] + aCollection <- getCollection(mbioDataset, aCollectionName) + expect_equal(inherits(aCollection, "Collection"), TRUE) + expect_equal(length(aCollection@data) > 0, TRUE) + + # phyloseq + data(GlobalPatterns, package="phyloseq") + + mbioDataset <- importTreeSummarizedExperiment(normalizationMethod = "none", keepRawValues = TRUE, verbose = TRUE, GlobalPatterns) + + expect_equal(inherits(mbioDataset, "MbioDataset"), TRUE) + expect_equal(length(getCollectionNames(mbioDataset)) > 0, TRUE) + aCollectionName <- getCollectionNames(mbioDataset)[1] + aCollection <- getCollection(mbioDataset, aCollectionName) + expect_equal(inherits(aCollection, "Collection"), TRUE) + expect_equal(length(aCollection@data) > 0, TRUE) +}) \ No newline at end of file From 30df8b214e35add07e886c998b250b6bdeedaafc Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Wed, 5 Jun 2024 12:29:59 -0400 Subject: [PATCH 5/8] mia and phyloseq are now suggested rather than imported --- DESCRIPTION | 8 +++++--- R/importers-MbioDataset.R | 17 +++++++++++++++++ R/methods-MbioDataset.R | 1 + R/utils.R | 6 ++++++ 4 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 R/utils.R diff --git a/DESCRIPTION b/DESCRIPTION index 1b802a0..8e719ef 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,9 +15,7 @@ Imports: veupathUtils, Maaslin2, methods, - mia, microbiomeComputations, - phyloseq, purrr, TreeSummarizedExperiment Remotes: @@ -38,6 +36,10 @@ Suggests: microbiomeData, knitr, rmarkdown, - tidyverse + tidyverse, + mia, + dada2, + biomformat, + phyloseq Config/testthat/edition: 3 VignetteBuilder: knitr diff --git a/R/importers-MbioDataset.R b/R/importers-MbioDataset.R index 3fb268e..4a923a0 100644 --- a/R/importers-MbioDataset.R +++ b/R/importers-MbioDataset.R @@ -169,6 +169,8 @@ importTreeSE <- importTreeSummarizedExperiment #' @export #' @importFrom mia importHUMAnN importHUMAnN <- function(normalizationMethod = c("TSS", "none"), keepRawValues = c(TRUE, FALSE), verbose = c(TRUE, FALSE), ...) { + .require_package("mia") + treeSE <- mia::importHUMAnN(...) mbioDataset <- importTreeSummarizedExperiment(treeSE, normalizationMethod = normalizationMethod, keepRawValues = keepRawValues, verbose = verbose) @@ -192,6 +194,8 @@ importHUMAnN <- function(normalizationMethod = c("TSS", "none"), keepRawValues = #' @export #' @importFrom mia importMetaPhlAn importMetaPhlAn <- function(normalizationMethod = c("TSS", "none"), keepRawValues = c(TRUE, FALSE), verbose = c(TRUE, FALSE), ...) { + .require_package("mia") + treeSE <- mia::importMetaPhlAn(...) mbioDataset <- importTreeSummarizedExperiment(treeSE, normalizationMethod = normalizationMethod, keepRawValues = keepRawValues, verbose = verbose) @@ -215,6 +219,8 @@ importMetaPhlAn <- function(normalizationMethod = c("TSS", "none"), keepRawValue #' @export #' @importFrom mia importMothur importMothur <- function(normalizationMethod = c("TSS", "none"), keepRawValues = c(TRUE, FALSE), verbose = c(TRUE, FALSE), ...) { + .require_package("mia") + treeSE <- mia::importMothur(...) mbioDataset <- importTreeSummarizedExperiment(treeSE, normalizationMethod = normalizationMethod, keepRawValues = keepRawValues, verbose = verbose) @@ -238,6 +244,8 @@ importMothur <- function(normalizationMethod = c("TSS", "none"), keepRawValues = #' @export #' @importFrom mia importQIIME2 importQIIME2 <- function(normalizationMethod = c("TSS", "none"), keepRawValues = c(TRUE, FALSE), verbose = c(TRUE, FALSE), ...) { + .require_package("mia") + treeSE <- mia::importQIIME2(...) mbioDataset <- importTreeSummarizedExperiment(treeSE, normalizationMethod = normalizationMethod, keepRawValues = keepRawValues, verbose = verbose) @@ -261,6 +269,9 @@ importQIIME2 <- function(normalizationMethod = c("TSS", "none"), keepRawValues = #' @export #' @importFrom mia makeTreeSEFromBiom importBIOM <- function(normalizationMethod = c("TSS", "none"), keepRawValues = c(TRUE, FALSE), verbose = c(TRUE, FALSE), ...) { + .require_package("mia") + .require_package("biomformat") + biom <- biomformat::read_biom(...) treeSE <- mia::makeTreeSEFromBiom(obj=biom) @@ -285,6 +296,9 @@ importBIOM <- function(normalizationMethod = c("TSS", "none"), keepRawValues = c #' @export #' @importFrom mia makeTreeSEFromDADA2 importDADA2 <- function(normalizationMethod = c("TSS", "none"), keepRawValues = c(TRUE, FALSE), verbose = c(TRUE, FALSE), ...) { + .require_package("mia") + .require_package("dada2") + treeSE <- mia::makeTreeSEFromDADA2(...) mbioDataset <- importTreeSummarizedExperiment(treeSE, normalizationMethod = normalizationMethod, keepRawValues = keepRawValues, verbose = verbose) @@ -308,6 +322,9 @@ importDADA2 <- function(normalizationMethod = c("TSS", "none"), keepRawValues = #' @export #' @importFrom mia makeTreeSEFromPhyloseq importPhyloseq <- function(normalizationMethod = c("TSS", "none"), keepRawValues = c(TRUE, FALSE), verbose = c(TRUE, FALSE), ...) { + .require_package("mia") + .require_package("phyloseq") + treeSE <- mia::makeTreeSEFromPhyloseq(...) mbioDataset <- importTreeSummarizedExperiment(treeSE, normalizationMethod = normalizationMethod, keepRawValues = keepRawValues, verbose = verbose) diff --git a/R/methods-MbioDataset.R b/R/methods-MbioDataset.R index 340d2d7..18722b2 100644 --- a/R/methods-MbioDataset.R +++ b/R/methods-MbioDataset.R @@ -259,6 +259,7 @@ setMethod("getCollection", "MbioDataset", function(object, collectionName = char ) } } else if (format == "phyloseq") { + .require_package("phyloseq") sampleNames <- collectionDT[[collection@recordIdColumn]] keepCols <- names(collectionDT)[! names(collectionDT) %in% collectionIdColumns] diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..affc9b4 --- /dev/null +++ b/R/utils.R @@ -0,0 +1,6 @@ +.require_package <- function(pkg){ + if(!requireNamespace(pkg, quietly = TRUE)){ + stop("'",pkg,"' package not found. Please install the '",pkg,"' package ", + "to use this feature.", call. = FALSE) + } +} \ No newline at end of file From a06589fc8a638af9e8bf02dfd298dd55bfcfd668 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Wed, 5 Jun 2024 12:30:12 -0400 Subject: [PATCH 6/8] fix up tests for phyloseq importer --- tests/testthat/test-importers-MbioDataset.R | 40 ++++++++++++++++----- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/tests/testthat/test-importers-MbioDataset.R b/tests/testthat/test-importers-MbioDataset.R index 04a4ef2..cc91c0c 100644 --- a/tests/testthat/test-importers-MbioDataset.R +++ b/tests/testthat/test-importers-MbioDataset.R @@ -92,9 +92,10 @@ test_that("we can get an MbioDataset from a TreeSummarizedExperiment", { # most of the important testing is in mia, so # we can test we get the right class back and its populated.. -test_that("the miaverse wrappers work", { +# TODO add instructions to readme for installing mia, bc of the system level dep +test_that("the humann miaverse wrapper works", { + skip_if_not_installed("mia") - # humann file_path <- system.file("extdata", "humann_output.tsv", package = "mia") mbioDataset <- importHUMAnN(normalizationMethod = "none", keepRawValues = TRUE, verbose = TRUE, file_path) @@ -106,7 +107,11 @@ test_that("the miaverse wrappers work", { expect_equal(inherits(aCollection, "Collection"), TRUE) expect_equal(length(aCollection@data) > 0, TRUE) - # mothur +}) + +test_that("the mothur miaverse wrapper works", { + skip_if_not_installed("mia") + counts <- system.file("extdata", "mothur_example.shared", package = "mia") taxa <- system.file("extdata", "mothur_example.cons.taxonomy", package = "mia") taxa2 <- system.file("extdata", "mothur_example.taxonomy", package = "mia") @@ -144,7 +149,11 @@ test_that("the miaverse wrappers work", { expect_equal(inherits(aCollection, "Collection"), TRUE) expect_equal(length(aCollection@data) > 0, TRUE) - # qiime2 +}) + +test_that("the qiime2 miaverse wrapper works", { + skip_if_not_installed("mia") + featureTableFile <- system.file("extdata", "table.qza", package = "mia") taxonomyTableFile <- system.file("extdata", "taxonomy.qza", package = "mia") @@ -157,8 +166,12 @@ test_that("the miaverse wrappers work", { expect_equal(inherits(aCollection, "Collection"), TRUE) expect_equal(length(aCollection@data) > 0, TRUE) - # biom - skip_if_not(require("biomformat", quietly = TRUE)) +}) + +test_that("the biom miaverse wrapper works", { + skip_if_not_installed("mia") + skip_if_not_installed("biomformat") + rich_dense_file = system.file("extdata", "rich_dense_otu_table.biom", package = "biomformat") @@ -171,8 +184,12 @@ test_that("the miaverse wrappers work", { expect_equal(inherits(aCollection, "Collection"), TRUE) expect_equal(length(aCollection@data) > 0, TRUE) - # dada2 +}) + +test_that("the dada2 miaverse wrapper works", { + skip_if_not_installed("mia") skip_if_not_installed("dada2") + fnF <- system.file("extdata", "sam1F.fastq.gz", package="dada2") fnR = system.file("extdata", "sam1R.fastq.gz", package="dada2") dadaF <- dada2::dada(fnF, selfConsist=TRUE) @@ -187,10 +204,15 @@ test_that("the miaverse wrappers work", { expect_equal(inherits(aCollection, "Collection"), TRUE) expect_equal(length(aCollection@data) > 0, TRUE) - # phyloseq +}) + +test_that("the phyloseq miaverse wrapper works", { + skip_if_not_installed("mia") + skip_if_not_installed("phyloseq") + data(GlobalPatterns, package="phyloseq") - mbioDataset <- importTreeSummarizedExperiment(normalizationMethod = "none", keepRawValues = TRUE, verbose = TRUE, GlobalPatterns) + mbioDataset <- importPhyloseq(normalizationMethod = "none", keepRawValues = TRUE, verbose = TRUE, GlobalPatterns) expect_equal(inherits(mbioDataset, "MbioDataset"), TRUE) expect_equal(length(getCollectionNames(mbioDataset)) > 0, TRUE) From a4b62830c6c5d34192daa58188f665b9c4685914 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Wed, 5 Jun 2024 12:31:36 -0400 Subject: [PATCH 7/8] update generated docs --- NAMESPACE | 17 ++++++++++++ man/importBIOM.Rd | 33 ++++++++++++++++++++++ man/importDADA2.Rd | 33 ++++++++++++++++++++++ man/importHUMAnN.Rd | 33 ++++++++++++++++++++++ man/importMetaPhlAn.Rd | 33 ++++++++++++++++++++++ man/importMothur.Rd | 33 ++++++++++++++++++++++ man/importPhyloseq.Rd | 33 ++++++++++++++++++++++ man/importQIIME2.Rd | 33 ++++++++++++++++++++++ man/importTreeSummarizedExperiment.Rd | 40 +++++++++++++++++++++++++++ 9 files changed, 288 insertions(+) create mode 100644 man/importBIOM.Rd create mode 100644 man/importDADA2.Rd create mode 100644 man/importHUMAnN.Rd create mode 100644 man/importMetaPhlAn.Rd create mode 100644 man/importMothur.Rd create mode 100644 man/importPhyloseq.Rd create mode 100644 man/importQIIME2.Rd create mode 100644 man/importTreeSummarizedExperiment.Rd diff --git a/NAMESPACE b/NAMESPACE index 2870b77..659be0c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,14 @@ export(getCollection) export(getComputeResult) export(getComputeResultWithMetadata) export(getVariables) +export(importBIOM) +export(importDADA2) +export(importHUMAnN) +export(importMetaPhlAn) +export(importMothur) +export(importPhyloseq) +export(importQIIME2) +export(importTreeSummarizedExperiment) export(rankedAbundance) export(selfCorrelation) export(updateCollectionName) @@ -23,9 +31,18 @@ exportMethods(getSampleMetadata) import(data.table) importFrom(DESeq2,DESeqDataSetFromMatrix) importFrom(Maaslin2,Maaslin2) +importFrom(SummarizedExperiment,rowData) importFrom(corGraph,bipartiteNetwork) importFrom(corGraph,unipartiteNetwork) +importFrom(data.table,data.table) importFrom(igraph,graph_from_data_frame) +importFrom(mia,importHUMAnN) +importFrom(mia,importMetaPhlAn) +importFrom(mia,importMothur) +importFrom(mia,importQIIME2) +importFrom(mia,makeTreeSEFromBiom) +importFrom(mia,makeTreeSEFromDADA2) +importFrom(mia,makeTreeSEFromPhyloseq) importFrom(microbiomeComputations,AbsoluteAbundanceData) importFrom(microbiomeComputations,AbundanceData) importFrom(microbiomeComputations,Comparator) diff --git a/man/importBIOM.Rd b/man/importBIOM.Rd new file mode 100644 index 0000000..1479d9e --- /dev/null +++ b/man/importBIOM.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/importers-MbioDataset.R +\name{importBIOM} +\alias{importBIOM} +\title{Import BIOM data} +\usage{ +importBIOM( + normalizationMethod = c("TSS", "none"), + keepRawValues = c(TRUE, FALSE), + verbose = c(TRUE, FALSE), + ... +) +} +\arguments{ +\item{normalizationMethod}{Normalization method to use on they assay data. Options are "none" and "TSS". +Applying TSS normalization to absolute taxonomic abundances produces relative taxonomic abundances. Default is "TSS".} + +\item{keepRawValues}{Keep the raw assay values as well as the normalized values.} + +\item{verbose}{Print messages} + +\item{...}{Arguments to pass to mia::makeTreeSEFromBiom} +} +\value{ +A MbioDataset +} +\description{ +Import data from BIOM results to MbioDataset. There is +some loss of granularity in this process. It results +in a simpler and more performant object which is compliant +with the MicrobiomeDB infrastructure. See \code{mia::makeTreeSEFromBiom} +for documentation. +} diff --git a/man/importDADA2.Rd b/man/importDADA2.Rd new file mode 100644 index 0000000..4bcb299 --- /dev/null +++ b/man/importDADA2.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/importers-MbioDataset.R +\name{importDADA2} +\alias{importDADA2} +\title{Import DADA2 data} +\usage{ +importDADA2( + normalizationMethod = c("TSS", "none"), + keepRawValues = c(TRUE, FALSE), + verbose = c(TRUE, FALSE), + ... +) +} +\arguments{ +\item{normalizationMethod}{Normalization method to use on they assay data. Options are "none" and "TSS". +Applying TSS normalization to absolute taxonomic abundances produces relative taxonomic abundances. Default is "TSS".} + +\item{keepRawValues}{Keep the raw assay values as well as the normalized values.} + +\item{verbose}{Print messages} + +\item{...}{Arguments to pass to mia::makeTreeSEFromDADA2} +} +\value{ +A MbioDataset +} +\description{ +Import data from DADA2 results to MbioDataset. There is +some loss of granularity in this process. It results +in a simpler and more performant object which is compliant +with the MicrobiomeDB infrastructure. See \code{mia::makeTreeSEFromDADA2} +for documentation. +} diff --git a/man/importHUMAnN.Rd b/man/importHUMAnN.Rd new file mode 100644 index 0000000..23df1c8 --- /dev/null +++ b/man/importHUMAnN.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/importers-MbioDataset.R +\name{importHUMAnN} +\alias{importHUMAnN} +\title{Import HUMAnN data} +\usage{ +importHUMAnN( + normalizationMethod = c("TSS", "none"), + keepRawValues = c(TRUE, FALSE), + verbose = c(TRUE, FALSE), + ... +) +} +\arguments{ +\item{normalizationMethod}{Normalization method to use on they assay data. Options are "none" and "TSS". +Applying TSS normalization to absolute taxonomic abundances produces relative taxonomic abundances. Default is "TSS".} + +\item{keepRawValues}{Keep the raw assay values as well as the normalized values.} + +\item{verbose}{Print messages} + +\item{...}{Arguments to pass to mia::importHUMAnN} +} +\value{ +A MbioDataset +} +\description{ +Import data from HUMAnN results to MbioDataset. There is +some loss of granularity in this process. It results +in a simpler and more performant object which is compliant +with the MicrobiomeDB infrastructure. See \code{mia::importHUMAnN} +for documentation. +} diff --git a/man/importMetaPhlAn.Rd b/man/importMetaPhlAn.Rd new file mode 100644 index 0000000..379da7d --- /dev/null +++ b/man/importMetaPhlAn.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/importers-MbioDataset.R +\name{importMetaPhlAn} +\alias{importMetaPhlAn} +\title{Import MetaPhlAn data} +\usage{ +importMetaPhlAn( + normalizationMethod = c("TSS", "none"), + keepRawValues = c(TRUE, FALSE), + verbose = c(TRUE, FALSE), + ... +) +} +\arguments{ +\item{normalizationMethod}{Normalization method to use on they assay data. Options are "none" and "TSS". +Applying TSS normalization to absolute taxonomic abundances produces relative taxonomic abundances. Default is "TSS".} + +\item{keepRawValues}{Keep the raw assay values as well as the normalized values.} + +\item{verbose}{Print messages} + +\item{...}{Arguments to pass to mia::importMetaPhlAn} +} +\value{ +A MbioDataset +} +\description{ +Import data from MetaPhlAn results to MbioDataset. There is +some loss of granularity in this process. It results +in a simpler and more performant object which is compliant +with the MicrobiomeDB infrastructure. See \code{mia::importMetaPhlAn} +for documentation. +} diff --git a/man/importMothur.Rd b/man/importMothur.Rd new file mode 100644 index 0000000..102769f --- /dev/null +++ b/man/importMothur.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/importers-MbioDataset.R +\name{importMothur} +\alias{importMothur} +\title{Import MOTHUR data} +\usage{ +importMothur( + normalizationMethod = c("TSS", "none"), + keepRawValues = c(TRUE, FALSE), + verbose = c(TRUE, FALSE), + ... +) +} +\arguments{ +\item{normalizationMethod}{Normalization method to use on they assay data. Options are "none" and "TSS". +Applying TSS normalization to absolute taxonomic abundances produces relative taxonomic abundances. Default is "TSS".} + +\item{keepRawValues}{Keep the raw assay values as well as the normalized values.} + +\item{verbose}{Print messages} + +\item{...}{Arguments to pass to mia::importMothur} +} +\value{ +A MbioDataset +} +\description{ +Import data from MOTHUR results to MbioDataset. There is +some loss of granularity in this process. It results +in a simpler and more performant object which is compliant +with the MicrobiomeDB infrastructure. See \code{mia::importMothur} +for documentation. +} diff --git a/man/importPhyloseq.Rd b/man/importPhyloseq.Rd new file mode 100644 index 0000000..7d3c9ee --- /dev/null +++ b/man/importPhyloseq.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/importers-MbioDataset.R +\name{importPhyloseq} +\alias{importPhyloseq} +\title{Import Phyloseq data} +\usage{ +importPhyloseq( + normalizationMethod = c("TSS", "none"), + keepRawValues = c(TRUE, FALSE), + verbose = c(TRUE, FALSE), + ... +) +} +\arguments{ +\item{normalizationMethod}{Normalization method to use on they assay data. Options are "none" and "TSS". +Applying TSS normalization to absolute taxonomic abundances produces relative taxonomic abundances. Default is "TSS".} + +\item{keepRawValues}{Keep the raw assay values as well as the normalized values.} + +\item{verbose}{Print messages} + +\item{...}{Arguments to pass to mia::makeTreeSEFromPhyloseq} +} +\value{ +A MbioDataset +} +\description{ +Import data from Phyloseq results to MbioDataset. There is +some loss of granularity in this process. It results +in a simpler and more performant object which is compliant +with the MicrobiomeDB infrastructure. See \code{mia::makeTreeSEFromPhyloseq} +for documentation. +} diff --git a/man/importQIIME2.Rd b/man/importQIIME2.Rd new file mode 100644 index 0000000..d703154 --- /dev/null +++ b/man/importQIIME2.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/importers-MbioDataset.R +\name{importQIIME2} +\alias{importQIIME2} +\title{Import QIIME2 data} +\usage{ +importQIIME2( + normalizationMethod = c("TSS", "none"), + keepRawValues = c(TRUE, FALSE), + verbose = c(TRUE, FALSE), + ... +) +} +\arguments{ +\item{normalizationMethod}{Normalization method to use on they assay data. Options are "none" and "TSS". +Applying TSS normalization to absolute taxonomic abundances produces relative taxonomic abundances. Default is "TSS".} + +\item{keepRawValues}{Keep the raw assay values as well as the normalized values.} + +\item{verbose}{Print messages} + +\item{...}{Arguments to pass to mia::importQIIME2} +} +\value{ +A MbioDataset +} +\description{ +Import data from QIIME2 results to MbioDataset. There is +some loss of granularity in this process. It results +in a simpler and more performant object which is compliant +with the MicrobiomeDB infrastructure. See \code{mia::importQIIME2} +for documentation. +} diff --git a/man/importTreeSummarizedExperiment.Rd b/man/importTreeSummarizedExperiment.Rd new file mode 100644 index 0000000..5497678 --- /dev/null +++ b/man/importTreeSummarizedExperiment.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/importers-MbioDataset.R +\name{importTreeSummarizedExperiment} +\alias{importTreeSummarizedExperiment} +\alias{importTreeSE} +\title{Import TreeSummarizedExperiment} +\usage{ +importTreeSummarizedExperiment( + data, + normalizationMethod = c("TSS", "none"), + keepRawValues = c(TRUE, FALSE), + verbose = c(TRUE, FALSE) +) + +importTreeSE( + data, + normalizationMethod = c("TSS", "none"), + keepRawValues = c(TRUE, FALSE), + verbose = c(TRUE, FALSE) +) +} +\arguments{ +\item{data}{A TreeSummarizedExperiment} + +\item{normalizationMethod}{Normalization method to use on they assay data. Options are "none" and "TSS". +Applying TSS normalization to absolute taxonomic abundances produces relative taxonomic abundances. Default is "TSS".} + +\item{keepRawValues}{Keep the raw assay values as well as the normalized values.} + +\item{verbose}{Print messages} +} +\value{ +A MbioDataset +} +\description{ +Import data from TreeSummarizedExperiment to MbioDataset. +There is some loss of granularity in this process. It results +in a simpler and more performant object which is compliant +with the MicrobiomeDB infrastructure. +} From 8d5a4c41c8aa7a04ce23c05b76e402b34b24dd32 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Wed, 5 Jun 2024 12:47:38 -0400 Subject: [PATCH 8/8] update readme --- README.md | 45 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index d9ba0a0..7684fbc 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,15 @@ # MicrobiomeDB -A suite of R packages containing all of the data from MicrobiomeDB.org, and tools for analyzing and visualizing the same. +A suite of R packages containing all of the data from MicrobiomeDB.org, and tools for analyzing and visualizing the same. It also supports +importing data from the following formats: + - TreeSummarizedExperiment + - phyloseq + - BIOM + - QIIME2 output files + - Mothur output files + - dada2 output files + - HUMAnN output files If you have questions or comments of any kind, join us on our [Discord Community Server](https://discord.gg/DEjNbxgq)! We'd love to hear from you. @@ -13,12 +21,41 @@ If you have questions or comments of any kind, join us on our [Discord Community Use the R package [remotes](https://cran.r-project.org/web/packages/remotes/index.html) to install MicrobiomeDB. From the R command prompt: -```R +```{R} remotes::install_github('microbiomeDB/MicrobiomeDB') ``` +If you would like to import data from any format other than TreeSummarizedExperiment, you will also need to install [mia][] from [Bioconductor][] with: + +```{R} +if (!requireNamespace("BiocManager", quietly = TRUE)) + install.packages("BiocManager") + +BiocManager::install("mia") +``` + +Please note that Linux and MacOS with source-level installations require the 'gsl' system dependency for `mia`. + +On Debian or Ubuntu + +``` +sudo apt-get install -y libgsl-dev +``` + +On Fedora, CentOS or RHEL + +``` +sudo yum install libgsl-devel +``` + +On macOS (source installations are not common on macOS, so this step is not usually necessary) + +``` +brew install gsl +``` + ## Usage -This package is intended to be used to explore the curated datasets from MicrobiomeDB.org, as well as (coming soon!) your own datasets. It comes pre-packaged with the same functions used to power the analysis tools from the website. Functions to facilitate easily transforming data between our custom objects, phyloseq objects, TreeSummarizedExperiment objects and .biom files are in development. +This package is intended to be used to explore the curated datasets from MicrobiomeDB.org, as well as your own datasets. It comes pre-packaged with the same functions used to power the analysis tools from the website. Functions to facilitate easily importing data from phyloseq objects, TreeSummarizedExperiment objects and .biom files (among others) are available. This package is paired with a dedicated data package called microbiomeData which includes a number of pre-built `MbioDataset` objects representing the curated data from the MicrobiomeDB.org website. You can see their names like: @@ -59,7 +96,7 @@ myAlphaDivDT <- getComputeResultWithMetadata(alphaDivResults, DiabImmune, 'host_ Or you can take these results as a `data.table` object and use them to build plots and things with ggplot2 or any other tool you like. -**Let us know if you build something interesting, encounter any bugs, or just wish something were easier to do. We'd love to hear from you! We consider each new issue filed to be a compliment.** +Let us know if you build something interesting, encounter any bugs, or just wish something were easier to do. We'd love to hear from you! We consider each new issue filed to be a compliment. ## Contributing Pull requests are welcome and should be made to the **dev** branch.