From fbffa9c5a2c20d932f767bb595a17032c3f01186 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Tue, 16 Apr 2024 14:10:11 -0400
Subject: [PATCH 01/24] add data package as suggested

---
 DESCRIPTION | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 403fb34..0160186 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -20,7 +20,8 @@ Imports:
 Remotes:
     VEuPathDB/veupathUtils,
     microbiomeDB/corGraph,
-    microbiomeDB/microbiomeComputations
+    microbiomeDB/microbiomeComputations,
+    microbiomeDB/microbiomeData
 URL: https://github.com/microbiomeDB/MicrobiomeDB, https://microbiomedb.github.io/MicrobiomeDB/
 BugReports: https://github.com/microbiomeDB/MicrobiomeDB/issues
 Description: This package is intended to be used to explore the curated datasets from MicrobiomeDB.org, as well as your own datasets. It comes pre-packaged with the same functions used to power the analysis tools from the website. It also contains functions to facilitate easily transforming data between our custom objects, phyloseq objects, and .biom files that you can upload to the website.
@@ -30,5 +31,6 @@ Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.3.1
 Suggests: 
     testthat (>= 3.0.0),
-    S4Vectors
+    S4Vectors,
+    microbiomeData
 Config/testthat/edition: 3

From b449084794cafc45e46357f1f9a9b1d67252fdc4 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Tue, 16 Apr 2024 15:05:50 -0400
Subject: [PATCH 02/24] examples for diff abund

---
 R/method-differentialAbundance.R     | 39 ++++++++++++++++++++++++++++
 man/Maaslin2.Rd                      | 12 +++++++++
 man/differentialAbundance-methods.Rd | 29 +++++++++++++++++++++
 3 files changed, 80 insertions(+)

diff --git a/R/method-differentialAbundance.R b/R/method-differentialAbundance.R
index e7fa33c..67bce58 100644
--- a/R/method-differentialAbundance.R
+++ b/R/method-differentialAbundance.R
@@ -72,7 +72,35 @@ buildBinaryComparator <- function(covariate, groupAValue, groupBValue) {
 #' about the data which may not be valid in other contexts. For better support of 
 #' longitudinal studies or metabolomic data, for example, please see our wrapper/ helper methods 
 #' for Maaslin2 (\code{MicrobiomeDB::Maaslin2}) and DESeq2 (\code{DESeqDataSetFromCollection}).
+#'
+#' @examples 
+#' ## a continuous variable
+#' diffAbundOutput <- MicrobiomeDB::differentialAbundance(
+#'        getCollection(microbiomeData::DiabImmune, '16S (V4) Genus'), 
+#'        "breastfed_duration_days", 
+#'        groupA = function(x) {x<300},
+#'        groupB = function(x) {x>=300},
+#'        method='Maaslin2', 
+#'        verbose=TRUE
+#' )
+#' 
+#' ## a categorical variable with 3 values, one of which we exclude
+#' diffAbundOutput <- MicrobiomeDB::differentialAbundance(
+#'        getCollection(microbiomeData::DiabImmune, '16S (V4) Genus'), 
+#'        "country", 
+#'        groupA = function(x) {x=="Russia"},
+#'        groupB = function(x) {x=="Finland"},
+#'        method='Maaslin2', 
+#'        verbose=FALSE
+#' )
 #' 
+#' ## a categorical variable with 2 values
+#' diffAbundOutput <- MicrobiomeDB::differentialAbundance(
+#'        getCollection(microbiomeData::DiabImmune, '16S (V4) Genus'),
+#'        "delivery_mode",
+#'        method='Maaslin2', 
+#'        verbose=FALSE
+#' ) 
 #' @param data AbundanceData object
 #' @param covariate character vector giving the name of a metadata variable of interest. If this 
 #' variable has only two values, you do not need to provide functions for arguments `groupA` and `groupB`.
@@ -160,6 +188,17 @@ function(data, covariate, groupA, groupB, method = c("Maaslin2", "DESeq2"), verb
 #' analysis methods (including support for multiple covariates and repeated measures)
 #' filtering, normalization, and transform options to customize analysis for your specific study.
 #' 
+#' @examples
+#' maaslinOutput <- MicrobiomeDB::Maaslin2(
+#'        data = getCollection(microbiomeData::DiabImmune, '16S (V4) Genus'), 
+#'        output = tempfile("maaslin"),
+#'        #min_prevalence = 0,
+#'        fixed_effects = 'delivery_mode',
+#'        analysis_method = "LM", # default LM
+#'        normalization = "TSS", # default TSS
+#'        transform = "LOG", # default LOG
+#'        plot_heatmap = F,
+#'        plot_scatter = F)
 #' @param data a CollectionWithMetadata
 #' @param verbose boolean indicating if timed logging is desired
 #' @param ... additional arguments to pass to Maaslin2::Maaslin2
diff --git a/man/Maaslin2.Rd b/man/Maaslin2.Rd
index f122513..afbed7d 100644
--- a/man/Maaslin2.Rd
+++ b/man/Maaslin2.Rd
@@ -23,3 +23,15 @@ epidemiological studies. The software includes multiple
 analysis methods (including support for multiple covariates and repeated measures)
 filtering, normalization, and transform options to customize analysis for your specific study.
 }
+\examples{
+maaslinOutput <- MicrobiomeDB::Maaslin2(
+       data = getCollection(microbiomeData::DiabImmune, '16S (V4) Genus'), 
+       output = tempfile("maaslin"),
+       #min_prevalence = 0,
+       fixed_effects = 'delivery_mode',
+       analysis_method = "LM", # default LM
+       normalization = "TSS", # default TSS
+       transform = "LOG", # default LOG
+       plot_heatmap = F,
+       plot_scatter = F)
+}
diff --git a/man/differentialAbundance-methods.Rd b/man/differentialAbundance-methods.Rd
index f908f0d..0986348 100644
--- a/man/differentialAbundance-methods.Rd
+++ b/man/differentialAbundance-methods.Rd
@@ -74,3 +74,32 @@ about the data which may not be valid in other contexts. For better support of
 longitudinal studies or metabolomic data, for example, please see our wrapper/ helper methods
 for Maaslin2 (\code{MicrobiomeDB::Maaslin2}) and DESeq2 (\code{DESeqDataSetFromCollection}).
 }
+\examples{
+## a continuous variable
+diffAbundOutput <- MicrobiomeDB::differentialAbundance(
+       getCollection(microbiomeData::DiabImmune, '16S (V4) Genus'), 
+       "breastfed_duration_days", 
+       groupA = function(x) {x<300},
+       groupB = function(x) {x>=300},
+       method='Maaslin2', 
+       verbose=TRUE
+)
+
+## a categorical variable with 3 values, one of which we exclude
+diffAbundOutput <- MicrobiomeDB::differentialAbundance(
+       getCollection(microbiomeData::DiabImmune, '16S (V4) Genus'), 
+       "country", 
+       groupA = function(x) {x=="Russia"},
+       groupB = function(x) {x=="Finland"},
+       method='Maaslin2', 
+       verbose=FALSE
+)
+
+## a categorical variable with 2 values
+diffAbundOutput <- MicrobiomeDB::differentialAbundance(
+       getCollection(microbiomeData::DiabImmune, '16S (V4) Genus'),
+       "delivery_mode",
+       method='Maaslin2', 
+       verbose=FALSE
+) 
+}

From 788272bedc771b026c3f2ba8d58284afc813e9e0 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Tue, 16 Apr 2024 15:17:54 -0400
Subject: [PATCH 03/24] update collection names in tests

---
 tests/testthat/test-GetComputeResult.R      | 8 ++++----
 tests/testthat/test-differentialAbundance.R | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/testthat/test-GetComputeResult.R b/tests/testthat/test-GetComputeResult.R
index 3761528..5ebb643 100644
--- a/tests/testthat/test-GetComputeResult.R
+++ b/tests/testthat/test-GetComputeResult.R
@@ -7,7 +7,7 @@ test_that("we can get compute results in different formats", {
     ontologyFile <- test_path('testdata','DiabImmune/DiabImmune_OntologyMetadata.txt')
     mbioDataset <- MbioDataset(list(dataFile1, dataFile2), list(metadataFile2, metadataFile1, metadataFile3), ontologyFile)
 
-    genus <- getCollection(mbioDataset, "16S Genus", continuousMetadataOnly = TRUE)
+    genus <- getCollection(mbioDataset, "16S (V4) Genus", continuousMetadataOnly = TRUE)
 
     # make sure metadata dont contain IRIs
     expect_equal(all(grepl('[',names(genus@sampleMetadata@data),fixed=T)), FALSE)
@@ -35,10 +35,10 @@ test_that("we can get compute results in different formats", {
                             )
                         )
     )
-    diffAbundOutput <- microbiomeComputations::differentialAbundance(getCollection(mbioDataset, "16S Genus"), comparatorVariable, method='Maaslin2', verbose=FALSE)
+    diffAbundOutput <- microbiomeComputations::differentialAbundance(getCollection(mbioDataset, "16S (V4) Genus"), comparatorVariable, method='Maaslin2', verbose=FALSE)
     expect_equal(inherits(diffAbundOutput, "ComputeResult"), TRUE)
 
-    correlationOutput <- MicrobiomeDB::selfCorrelation(getCollection(mbioDataset, "16S Genus"), method='spearman', verbose=FALSE)
+    correlationOutput <- MicrobiomeDB::selfCorrelation(getCollection(mbioDataset, "16S (V4) Genus"), method='spearman', verbose=FALSE)
     correlationDT <- getComputeResult(correlationOutput, "data.table")
     expect_equal(inherits(correlationDT, "data.table"), TRUE)
     expect_equal(all(c('data1', 'data2', 'correlationCoef', 'pValue') %in% names(correlationDT)), TRUE)
@@ -60,7 +60,7 @@ test_that("we can get compute results in different formats", {
     expect_equal(inherits(correlationUnipartiteWidget, "unipartitenetwork"), TRUE)
 
     # make sure getComputeResultWithMetadata works
-    alphaDivOutput <- MicrobiomeDB::alphaDiv(getCollection(mbioDataset, "16S Genus"), method='shannon', verbose=FALSE)
+    alphaDivOutput <- MicrobiomeDB::alphaDiv(getCollection(mbioDataset, "16S (V4) Genus"), method='shannon', verbose=FALSE)
     expect_equal(inherits(alphaDivOutput, "ComputeResult"), TRUE)
     alphaDivDT <- getComputeResultWithMetadata(alphaDivOutput, mbioDataset, metadataVariables = c('country', 'delivery_mode'))
     expect_equal(inherits(alphaDivDT, "data.table"), TRUE)
diff --git a/tests/testthat/test-differentialAbundance.R b/tests/testthat/test-differentialAbundance.R
index 6747b32..21e16f1 100644
--- a/tests/testthat/test-differentialAbundance.R
+++ b/tests/testthat/test-differentialAbundance.R
@@ -7,7 +7,7 @@ metadataFile3 <- test_path('testdata','DiabImmune/DiabImmune_Sample.txt')
 ontologyFile <- test_path('testdata','DiabImmune/DiabImmune_OntologyMetadata.txt')
 mbioDataset <- MbioDataset(list(dataFile1, dataFile2), list(metadataFile2, metadataFile1, metadataFile3), ontologyFile)
 
-genus <- getCollection(mbioDataset, "16S Genus")
+genus <- getCollection(mbioDataset, "16S (V4) Genus")
 
 genusIdCols <- veupathUtils::getIdColumns(genus)
 counts <- round(microbiomeComputations::getAbundances(genus, includeIds=FALSE)*1000)

From a83016e5c6c9cb1fc383e47f92e1fa5a8e0d25e6 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Tue, 16 Apr 2024 15:29:16 -0400
Subject: [PATCH 04/24] compute result examples

---
 R/methods-ComputeResult.R           | 25 +++++++++++++++++++++++++
 man/correlationNetwork.Rd           |  8 ++++++++
 man/getComputeResult.Rd             |  9 +++++++++
 man/getComputeResultWithMetadata.Rd |  8 ++++++++
 4 files changed, 50 insertions(+)

diff --git a/R/methods-ComputeResult.R b/R/methods-ComputeResult.R
index 4861e0c..15accec 100644
--- a/R/methods-ComputeResult.R
+++ b/R/methods-ComputeResult.R
@@ -2,6 +2,15 @@
 #' 
 #' Get the compute result from a Microbiome Dataset in a particular format.
 #' Some formats may not be supported for all compute results.
+#' 
+#' @examples 
+#' correlationOutput <- MicrobiomeDB::selfCorrelation(
+#'      getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), 
+#'      method='spearman', 
+#'      verbose=FALSE
+#' )
+#' correlationDT <- getComputeResult(correlationOutput, "data.table")
+#' correlationIGraph <- getComputeResult(correlationOutput, "igraph")
 #' @param object A Microbiome Dataset
 #' @param format The format of the compute result. Currently only "data.table" and "igraph" are supported.
 #' @param ... additional arguments passed to getComputeResult method of the subclasses of ComputeResult
@@ -74,6 +83,14 @@ mergeComputeResultAndMetadata <- function(computeResult, dataset, metadataVariab
 #' Get Microbiome Dataset Compute Result With Metadata
 #' 
 #' Get the compute result from a Microbiome Dataset in a particular format with metadata.
+#' 
+#' @examples 
+#' alphaDivOutput <- MicrobiomeDB::alphaDiv(
+#'      getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), 
+#'      method='shannon', 
+#'      verbose=FALSE
+#' )
+#' alphaDivDT <- getComputeResultWithMetadata(alphaDivOutput, mbioDataset, metadataVariables = c('country', 'delivery_mode'))
 #' @param object A Microbiome Dataset
 #' @param dataset The MbioDataset, AbundanceData or Collection object from which the compute result was obtained.
 #' @param format The format you want the compute result in. Currently only "data.table" is supported.
@@ -120,6 +137,14 @@ function(object, dataset = NULL, format = c("data.table"), metadataVariables = N
 #' Correlation Network Visualization
 #' 
 #' Visualize a correlation result as a network
+#' 
+#' @examples 
+#' correlationOutput <- MicrobiomeDB::correlation(
+#'      getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), 
+#'      method='spearman', 
+#'      verbose=FALSE
+#' )
+#' correlationNetwork(correlationOutput) ## renders html widget
 #' @param object A ComputeResult or data.frame
 #' @param correlationCoefThreshold threshold to filter edges by correlation coefficient.
 #' Edges with correlation coefficients below this threshold will be removed. Default is .5
diff --git a/man/correlationNetwork.Rd b/man/correlationNetwork.Rd
index ad44a90..9372d62 100644
--- a/man/correlationNetwork.Rd
+++ b/man/correlationNetwork.Rd
@@ -42,3 +42,11 @@ Edges with correlation coefficients below this threshold will be removed. Defaul
 \description{
 Visualize a correlation result as a network
 }
+\examples{
+correlationOutput <- MicrobiomeDB::correlation(
+     getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), 
+     method='spearman', 
+     verbose=FALSE
+)
+correlationNetwork(correlationOutput) ## renders html widget
+}
diff --git a/man/getComputeResult.Rd b/man/getComputeResult.Rd
index eb98f4a..5296250 100644
--- a/man/getComputeResult.Rd
+++ b/man/getComputeResult.Rd
@@ -39,3 +39,12 @@ The compute result in the specified format
 Get the compute result from a Microbiome Dataset in a particular format.
 Some formats may not be supported for all compute results.
 }
+\examples{
+correlationOutput <- MicrobiomeDB::selfCorrelation(
+     getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), 
+     method='spearman', 
+     verbose=FALSE
+)
+correlationDT <- getComputeResult(correlationOutput, "data.table")
+correlationIGraph <- getComputeResult(correlationOutput, "igraph")
+}
diff --git a/man/getComputeResultWithMetadata.Rd b/man/getComputeResultWithMetadata.Rd
index be162ef..a815ac1 100644
--- a/man/getComputeResultWithMetadata.Rd
+++ b/man/getComputeResultWithMetadata.Rd
@@ -50,3 +50,11 @@ The compute result in the specified format
 \description{
 Get the compute result from a Microbiome Dataset in a particular format with metadata.
 }
+\examples{
+alphaDivOutput <- MicrobiomeDB::alphaDiv(
+     getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), 
+     method='shannon', 
+     verbose=FALSE
+)
+alphaDivDT <- getComputeResultWithMetadata(alphaDivOutput, mbioDataset, metadataVariables = c('country', 'delivery_mode'))
+}

From ab7bc79297abc4f8f959e2937dca0ae12113b179 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Tue, 16 Apr 2024 15:32:19 -0400
Subject: [PATCH 05/24] import methods

---
 DESCRIPTION | 1 +
 1 file changed, 1 insertion(+)

diff --git a/DESCRIPTION b/DESCRIPTION
index 0160186..fe6e8bd 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -14,6 +14,7 @@ Imports:
     igraph,
     veupathUtils,
     Maaslin2,
+    methods,
     microbiomeComputations,
     phyloseq,
     purrr

From df420969d3a7d559baa5b5c0a92460199d137199 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Tue, 16 Apr 2024 15:39:41 -0400
Subject: [PATCH 06/24] examples for mbiodataset

---
 R/methods-MbioDataset.R                       | 19 +++++++++++++++++++
 man/getCollection.Rd                          |  6 ++++++
 man/getCollectionNames-MbioDataset-method.Rd  |  3 +++
 ...etadataVariableNames-MbioDataset-method.Rd |  3 +++
 man/getSampleMetadata-MbioDataset-method.Rd   |  4 ++++
 man/updateCollectionName.Rd                   |  4 ++++
 6 files changed, 39 insertions(+)

diff --git a/R/methods-MbioDataset.R b/R/methods-MbioDataset.R
index 63a7650..f782cfd 100644
--- a/R/methods-MbioDataset.R
+++ b/R/methods-MbioDataset.R
@@ -2,6 +2,9 @@ collectionNamesGeneric <- getGeneric("getCollectionNames", "veupathUtils")
 #' Get Names of Collections
 #' 
 #' Get the names of the collections in a MbioDataset object
+#' 
+#' @examples 
+#' getCollectionNames(microbiomeData::DiabImmune)
 #' @param object An MbioDataset
 #' @return A character vector of collection names
 #' @export
@@ -12,6 +15,9 @@ metadataVarNamesGeneric <- getGeneric("getMetadataVariableNames", "veupathUtils"
 #' Get Variable Names of Metadata
 #' 
 #' Get the names of the metadata variables in an MbioDataset.
+#' 
+#' @examples 
+#' getMetadataVariableNames(microbiomeData::DiabImmune)
 #' @param object An MbioDataset
 #' @return a character vector of metadata variable names
 #' @export
@@ -23,6 +29,9 @@ sampleMetadataGeneric <- getGeneric("getSampleMetadata", "veupathUtils")
 #'
 #' Returns a data.table of sample metadata
 #' 
+#' @examples
+#' getSampleMetadata(microbiomeData::DiabImmune)
+#' getSampleMetadata(microbiomeData::DiabImmune, metadataVariables = c("age_years", "sex"))
 #' @param object MbioDataset
 #' @param asCopy boolean indicating whether to return the data as a copy or by reference
 #' @param includeIds boolean indicating whether we should include recordIdColumn and ancestorIdColumns
@@ -70,6 +79,10 @@ setMethod(metadataIdColsGeneric, "MbioDataset", function(object) veupathUtils::g
 #' Update Microbiome Dataset Collection Name
 #' 
 #' Update the name of a collection in the Microbiome Dataset.
+#' 
+#' @examples
+#' myCopyOfDiabImmune <- microbiomeData::DiabImmune
+#' myCopyOfDiabImmune <- updateCollectionName(myCopyOfDiabImmune, "16S (V4) Genus", "16S Genus")
 #' @param object A Microbiome Dataset
 #' @param oldName The name of the collection to update
 #' @param newName The new name of the collection
@@ -89,6 +102,12 @@ setMethod("updateCollectionName", "MbioDataset", function(object, oldName, newNa
 #' 
 #' Get a collection from the Microbiome Dataset. The collection will be returned
 #' as an AbundanceData, phyloseq, or Collection object.
+#' 
+#' @examples 
+#' genus <- getCollection(microbiomeData::DiabImmune, "16S (V4) Genus")
+#' genus_phyloseq <- getCollection(microbiomeData::DiabImmune, "16S (V4) Genus", format = "phyloseq")
+#' genus_continuous <- getCollection(microbiomeData::DiabImmune, "16S (V4) Genus", continuousMetadataOnly = TRUE) ## to pass to correlation method
+#' genus_collection <- getCollection(microbiomeData::DiabImmune, "16S (V4) Genus", format = "Collection") ## with no metadata
 #' @param object A Microbiome Dataset
 #' @param collectionName The name of the collection to return
 #' @param format The format of the collection to return. Currently supported options are "AbundanceData", "phyloseq" and "Collection".
diff --git a/man/getCollection.Rd b/man/getCollection.Rd
index ef622d8..fff77bd 100644
--- a/man/getCollection.Rd
+++ b/man/getCollection.Rd
@@ -36,3 +36,9 @@ An AbundanceData, phyloseq, or Collection object representing the collection and
 Get a collection from the Microbiome Dataset. The collection will be returned
 as an AbundanceData, phyloseq, or Collection object.
 }
+\examples{
+genus <- getCollection(microbiomeData::DiabImmune, "16S (V4) Genus")
+genus_phyloseq <- getCollection(microbiomeData::DiabImmune, "16S (V4) Genus", format = "phyloseq")
+genus_continuous <- getCollection(microbiomeData::DiabImmune, "16S (V4) Genus", continuousMetadataOnly = TRUE) ## to pass to correlation method
+genus_collection <- getCollection(microbiomeData::DiabImmune, "16S (V4) Genus", format = "Collection") ## with no metadata
+}
diff --git a/man/getCollectionNames-MbioDataset-method.Rd b/man/getCollectionNames-MbioDataset-method.Rd
index f028633..1de3f40 100644
--- a/man/getCollectionNames-MbioDataset-method.Rd
+++ b/man/getCollectionNames-MbioDataset-method.Rd
@@ -15,3 +15,6 @@ A character vector of collection names
 \description{
 Get the names of the collections in a MbioDataset object
 }
+\examples{
+getCollectionNames(microbiomeData::DiabImmune)
+}
diff --git a/man/getMetadataVariableNames-MbioDataset-method.Rd b/man/getMetadataVariableNames-MbioDataset-method.Rd
index 97051fe..630c72f 100644
--- a/man/getMetadataVariableNames-MbioDataset-method.Rd
+++ b/man/getMetadataVariableNames-MbioDataset-method.Rd
@@ -15,3 +15,6 @@ a character vector of metadata variable names
 \description{
 Get the names of the metadata variables in an MbioDataset.
 }
+\examples{
+getMetadataVariableNames(microbiomeData::DiabImmune)
+}
diff --git a/man/getSampleMetadata-MbioDataset-method.Rd b/man/getSampleMetadata-MbioDataset-method.Rd
index ddf3b88..d7817fe 100644
--- a/man/getSampleMetadata-MbioDataset-method.Rd
+++ b/man/getSampleMetadata-MbioDataset-method.Rd
@@ -26,3 +26,7 @@ data.table of sample metadata
 \description{
 Returns a data.table of sample metadata
 }
+\examples{
+getSampleMetadata(microbiomeData::DiabImmune)
+getSampleMetadata(microbiomeData::DiabImmune, metadataVariables = c("age_years", "sex"))
+}
diff --git a/man/updateCollectionName.Rd b/man/updateCollectionName.Rd
index d469f0d..275f110 100644
--- a/man/updateCollectionName.Rd
+++ b/man/updateCollectionName.Rd
@@ -23,3 +23,7 @@ A Microbiome Dataset with the updated collection name
 \description{
 Update the name of a collection in the Microbiome Dataset.
 }
+\examples{
+myCopyOfDiabImmune <- microbiomeData::DiabImmune
+myCopyOfDiabImmune <- updateCollectionName(myCopyOfDiabImmune, "16S (V4) Genus", "16S Genus")
+}

From a642559d37eebe77f57106b48a623b9aa85e92b5 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Tue, 16 Apr 2024 15:44:09 -0400
Subject: [PATCH 07/24] add example for getMetadataVariableSummary

---
 NAMESPACE               | 2 ++
 R/methods-MbioDataset.R | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/NAMESPACE b/NAMESPACE
index d2c6bd4..e9eeebc 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -16,6 +16,7 @@ export(selfCorrelation)
 export(updateCollectionName)
 exportMethods(getCollectionNames)
 exportMethods(getMetadataVariableNames)
+exportMethods(getMetadataVariableSummary)
 exportMethods(getSampleMetadata)
 import(data.table)
 importFrom(DESeq2,DESeqDataSetFromMatrix)
@@ -47,6 +48,7 @@ importFrom(veupathUtils,getCollectionNames)
 importFrom(veupathUtils,getDataFromSource)
 importFrom(veupathUtils,getIdColumns)
 importFrom(veupathUtils,getMetadataVariableNames)
+importFrom(veupathUtils,getMetadataVariableSummary)
 importFrom(veupathUtils,getSampleMetadata)
 importFrom(veupathUtils,getSampleMetadataIdColumns)
 importFrom(veupathUtils,matchArg)
diff --git a/R/methods-MbioDataset.R b/R/methods-MbioDataset.R
index e22e380..c3edfea 100644
--- a/R/methods-MbioDataset.R
+++ b/R/methods-MbioDataset.R
@@ -28,6 +28,11 @@ metadataVarSummaryGeneric <- getGeneric("getMetadataVariableSummary", "veupathUt
 #' Get Summary of Metadata Variables
 #' 
 #' Get a summary of the requested metadata variable in an MbioDataset.
+#' 
+#' @examples 
+#' getMetadataVariableSummary(microbiomeData::DiabImmune, "age_years")
+#' getMetadataVariableSummary(microbiomeData::DiabImmune, "sex")
+#' getMetadataVariableSummary(microbiomeData::DiabImmune, "country")
 #' @param object An MbioDataset
 #' @param variable A character vector representing the name of the metadata variable to summarize
 #' @return a table summarizing the values of the requested metadata variable

From af0f8f1789828926d02a71f52a81b3d4f1e6a17a Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Tue, 16 Apr 2024 15:44:22 -0400
Subject: [PATCH 08/24] update generated docs

---
 ...adataVariableSummary-MbioDataset-method.Rd | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 man/getMetadataVariableSummary-MbioDataset-method.Rd

diff --git a/man/getMetadataVariableSummary-MbioDataset-method.Rd b/man/getMetadataVariableSummary-MbioDataset-method.Rd
new file mode 100644
index 0000000..97ae3a2
--- /dev/null
+++ b/man/getMetadataVariableSummary-MbioDataset-method.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/methods-MbioDataset.R
+\name{getMetadataVariableSummary,MbioDataset-method}
+\alias{getMetadataVariableSummary,MbioDataset-method}
+\title{Get Summary of Metadata Variables}
+\usage{
+\S4method{getMetadataVariableSummary}{MbioDataset}(object, variable)
+}
+\arguments{
+\item{object}{An MbioDataset}
+
+\item{variable}{A character vector representing the name of the metadata variable to summarize}
+}
+\value{
+a table summarizing the values of the requested metadata variable
+}
+\description{
+Get a summary of the requested metadata variable in an MbioDataset.
+}
+\examples{
+getMetadataVariableSummary(microbiomeData::DiabImmune, "age_years")
+getMetadataVariableSummary(microbiomeData::DiabImmune, "sex")
+getMetadataVariableSummary(microbiomeData::DiabImmune, "country")
+}

From 4ff23aa2453b65bc93097efbb62997379e5e30ef Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Tue, 16 Apr 2024 15:56:53 -0400
Subject: [PATCH 09/24] maintaining some docs in dup for user convenience/
 searchability and examples

---
 R/reexports-microbiomeComputations.R | 70 ++++++++++++++++++++++++++++
 man/alphaDiv-methods.Rd              | 28 +++++++++++
 man/betaDiv-methods.Rd               | 31 ++++++++++++
 man/correlation-methods.Rd           | 39 ++++++++++++++++
 man/rankedAbundance-methods.Rd       | 31 ++++++++++++
 man/reexports.Rd                     | 22 ---------
 man/selfCorrelation-methods.Rd       | 41 ++++++++++++++++
 7 files changed, 240 insertions(+), 22 deletions(-)
 create mode 100644 man/alphaDiv-methods.Rd
 create mode 100644 man/betaDiv-methods.Rd
 create mode 100644 man/correlation-methods.Rd
 create mode 100644 man/rankedAbundance-methods.Rd
 delete mode 100644 man/reexports.Rd
 create mode 100644 man/selfCorrelation-methods.Rd

diff --git a/R/reexports-microbiomeComputations.R b/R/reexports-microbiomeComputations.R
index 9912b9c..29c94b8 100644
--- a/R/reexports-microbiomeComputations.R
+++ b/R/reexports-microbiomeComputations.R
@@ -1,20 +1,90 @@
+#' Ranked abundance
+#'
+#' This function returns abundances, ranked by a selected ranking function
+#' 
+#' @examples
+#' rankedAbundOutput <- rankedAbundance(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = "median")
+#' @param data AbundanceData object
+#' @param method string defining the ranking strategy by which to order the taxa. Accepted values are 'median','max','q3',and 'variance'. Note that taxa that return a value of 0 for a given method will not be included in the results.
+#' @param cutoff integer indicating the maximium number of taxa to be kept after ranking.
+#' @param verbose boolean indicating if timed logging is desired
+#' @return ComputeResult object
 #' @importFrom microbiomeComputations rankedAbundance
 #' @export
+#' @rdname rankedAbundance-methods
 microbiomeComputations::rankedAbundance
 
+#' Alpha diversity
+#'
+#' This function returns alpha diversity values for each sample.
+#' 
+#' @examples
+#' alphaDivOutput <- alphaDiv(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = "shannon")
+#' @param data AbundanceData object
+#' @param method string defining the the alpha diversity method. Accepted values are 'shannon','simpson', and 'evenness'
+#' @param verbose boolean indicating if timed logging is desired
+#' @return ComputeResult object
 #' @importFrom microbiomeComputations alphaDiv
 #' @export
+#' @rdname alphaDiv-methods
 microbiomeComputations::alphaDiv
 
+#' Beta diversity
+#'
+#' This function returns pcoa coordinates calculated from the beta diversity dissimilarity matrix.
+#' 
+#' @examples 
+#' betaDivOutput <- betaDiv(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = "bray", k = 2)
+#' @param data AbundanceData object
+#' @param method string defining the the beta diversity dissimilarity method. Accepted values are 'bray','jaccard', and 'jsd'
+#' @param k integer determining the number of pcoa axes to return
+#' @param verbose boolean indicating if timed logging is desired
+#' @return ComputeResult object
 #' @importFrom microbiomeComputations betaDiv
 #' @export
+#' @rdname betaDiv-methods
 microbiomeComputations::betaDiv
 
+#' Correlation
+#'
+#' This function returns correlation coefficients for variables in one dataset against variables in a second dataset
+#' 
+#' @examples
+#' correlationDT <- correlation(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = 'spearman', format = 'data.table')
+#' correlationOutput <- correlation(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = 'spearman', format = 'ComputeResult')
+#' alsoCorrelationDT <- getComputeResult(correlationOutput, "data.table")
+#' @param data1 first dataset. A data.table
+#' @param data2 second dataset. A data.table
+#' @param method string defining the type of correlation to run. 
+#' The currently supported values are specific to the class of data1 and data2.
+#' @param format string defining the desired format of the result. 
+#' The currently supported values are 'data.table' and 'ComputeResult'.
+#' @param verbose boolean indicating if timed logging is desired
+#' @return data.frame with correlation coefficients or a ComputeResult object
 #' @importFrom veupathUtils correlation
 #' @export
+#' @rdname correlation-methods
 veupathUtils::correlation
 
+#' Self Correlation
+#'
+#' This function returns correlation coefficients for variables in one AbundanceData object against itself. It generally serves as a 
+#' convenience wrapper around veupathUtils::correlation, with the exception that it additionally supports sparcc.
+#' 
+#' @examples
+#' correlationDT <- selfCorrelation(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = 'sparcc', format = 'data.table')
+#' correlationOutput <- selfCorrelation(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = 'sparcc', format = 'ComputeResult')
+#' alsoCorrelationDT <- getComputeResult(correlationOutput, "data.table")
+#' @param data An AbundanceData object
+#' @param method string defining the type of correlation to run. The currently supported values are 'spearman','pearson' and 'sparcc'
+#' @param format string defining the desired format of the result. The currently supported values are 'data.table' and 'ComputeResult'.
+#' @param verbose boolean indicating if timed logging is desired
+#' @param proportionNonZeroThreshold numeric threshold to filter features by proportion of non-zero values across samples
+#' @param varianceThreshold numeric threshold to filter features by variance across samples
+#' @param stdDevThreshold numeric threshold to filter features by standard deviation across samples
+#' @return ComputeResult object
 #' @importFrom veupathUtils selfCorrelation
 #' @importFrom microbiomeComputations selfCorrelation
 #' @export
+#' @rdname selfCorrelation-methods
 veupathUtils::selfCorrelation
diff --git a/man/alphaDiv-methods.Rd b/man/alphaDiv-methods.Rd
new file mode 100644
index 0000000..aac6518
--- /dev/null
+++ b/man/alphaDiv-methods.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/reexports-microbiomeComputations.R
+\name{alphaDiv}
+\alias{alphaDiv}
+\title{Alpha diversity}
+\usage{
+alphaDiv(
+  data,
+  method = c("shannon", "simpson", "evenness"),
+  verbose = c(TRUE, FALSE)
+)
+}
+\arguments{
+\item{data}{AbundanceData object}
+
+\item{method}{string defining the the alpha diversity method. Accepted values are 'shannon','simpson', and 'evenness'}
+
+\item{verbose}{boolean indicating if timed logging is desired}
+}
+\value{
+ComputeResult object
+}
+\description{
+This function returns alpha diversity values for each sample.
+}
+\examples{
+alphaDivOutput <- alphaDiv(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = "shannon")
+}
diff --git a/man/betaDiv-methods.Rd b/man/betaDiv-methods.Rd
new file mode 100644
index 0000000..7cd9852
--- /dev/null
+++ b/man/betaDiv-methods.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/reexports-microbiomeComputations.R
+\name{betaDiv}
+\alias{betaDiv}
+\title{Beta diversity}
+\usage{
+betaDiv(
+  data,
+  method = c("bray", "jaccard", "jsd"),
+  k = 2,
+  verbose = c(TRUE, FALSE)
+)
+}
+\arguments{
+\item{data}{AbundanceData object}
+
+\item{method}{string defining the the beta diversity dissimilarity method. Accepted values are 'bray','jaccard', and 'jsd'}
+
+\item{k}{integer determining the number of pcoa axes to return}
+
+\item{verbose}{boolean indicating if timed logging is desired}
+}
+\value{
+ComputeResult object
+}
+\description{
+This function returns pcoa coordinates calculated from the beta diversity dissimilarity matrix.
+}
+\examples{
+betaDivOutput <- betaDiv(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = "bray", k = 2)
+}
diff --git a/man/correlation-methods.Rd b/man/correlation-methods.Rd
new file mode 100644
index 0000000..dd9b357
--- /dev/null
+++ b/man/correlation-methods.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/reexports-microbiomeComputations.R
+\name{correlation}
+\alias{correlation}
+\title{Correlation}
+\usage{
+correlation(
+  data1,
+  data2,
+  method,
+  format = c("ComputeResult", "data.table"),
+  verbose = c(TRUE, FALSE),
+  ...
+)
+}
+\arguments{
+\item{data1}{first dataset. A data.table}
+
+\item{data2}{second dataset. A data.table}
+
+\item{method}{string defining the type of correlation to run.
+The currently supported values are specific to the class of data1 and data2.}
+
+\item{format}{string defining the desired format of the result.
+The currently supported values are 'data.table' and 'ComputeResult'.}
+
+\item{verbose}{boolean indicating if timed logging is desired}
+}
+\value{
+data.frame with correlation coefficients or a ComputeResult object
+}
+\description{
+This function returns correlation coefficients for variables in one dataset against variables in a second dataset
+}
+\examples{
+correlationDT <- correlation(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = 'spearman', format = 'data.table')
+correlationOutput <- correlation(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = 'spearman', format = 'ComputeResult')
+alsoCorrelationDT <- getComputeResult(correlationOutput, "data.table")
+}
diff --git a/man/rankedAbundance-methods.Rd b/man/rankedAbundance-methods.Rd
new file mode 100644
index 0000000..6e62df3
--- /dev/null
+++ b/man/rankedAbundance-methods.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/reexports-microbiomeComputations.R
+\name{rankedAbundance}
+\alias{rankedAbundance}
+\title{Ranked abundance}
+\usage{
+rankedAbundance(
+  data,
+  method = c("median", "max", "q3", "variance"),
+  cutoff = 10,
+  verbose = c(TRUE, FALSE)
+)
+}
+\arguments{
+\item{data}{AbundanceData object}
+
+\item{method}{string defining the ranking strategy by which to order the taxa. Accepted values are 'median','max','q3',and 'variance'. Note that taxa that return a value of 0 for a given method will not be included in the results.}
+
+\item{cutoff}{integer indicating the maximium number of taxa to be kept after ranking.}
+
+\item{verbose}{boolean indicating if timed logging is desired}
+}
+\value{
+ComputeResult object
+}
+\description{
+This function returns abundances, ranked by a selected ranking function
+}
+\examples{
+rankedAbundOutput <- rankedAbundance(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = "median")
+}
diff --git a/man/reexports.Rd b/man/reexports.Rd
deleted file mode 100644
index 7fcf2ce..0000000
--- a/man/reexports.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/computes.R
-\docType{import}
-\name{reexports}
-\alias{reexports}
-\alias{rankedAbundance}
-\alias{alphaDiv}
-\alias{betaDiv}
-\alias{correlation}
-\alias{selfCorrelation}
-\title{Objects exported from other packages}
-\keyword{internal}
-\description{
-These objects are imported from other packages. Follow the links
-below to see their documentation.
-
-\describe{
-  \item{microbiomeComputations}{\code{\link[microbiomeComputations:alphaDiv-methods]{alphaDiv}}, \code{\link[microbiomeComputations:betaDiv-methods]{betaDiv}}, \code{\link[microbiomeComputations:rankedAbundance-methods]{rankedAbundance}}}
-
-  \item{veupathUtils}{\code{\link[veupathUtils]{correlation}}, \code{\link[veupathUtils]{selfCorrelation}}}
-}}
-
diff --git a/man/selfCorrelation-methods.Rd b/man/selfCorrelation-methods.Rd
new file mode 100644
index 0000000..4a9a32d
--- /dev/null
+++ b/man/selfCorrelation-methods.Rd
@@ -0,0 +1,41 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/reexports-microbiomeComputations.R
+\name{selfCorrelation}
+\alias{selfCorrelation}
+\title{Self Correlation}
+\usage{
+selfCorrelation(
+  data,
+  method = c("spearman", "pearson", "sparcc"),
+  format = c("ComputeResult", "data.table"),
+  verbose = c(TRUE, FALSE),
+  ...
+)
+}
+\arguments{
+\item{data}{An AbundanceData object}
+
+\item{method}{string defining the type of correlation to run. The currently supported values are 'spearman','pearson' and 'sparcc'}
+
+\item{format}{string defining the desired format of the result. The currently supported values are 'data.table' and 'ComputeResult'.}
+
+\item{verbose}{boolean indicating if timed logging is desired}
+
+\item{proportionNonZeroThreshold}{numeric threshold to filter features by proportion of non-zero values across samples}
+
+\item{varianceThreshold}{numeric threshold to filter features by variance across samples}
+
+\item{stdDevThreshold}{numeric threshold to filter features by standard deviation across samples}
+}
+\value{
+ComputeResult object
+}
+\description{
+This function returns correlation coefficients for variables in one AbundanceData object against itself. It generally serves as a
+convenience wrapper around veupathUtils::correlation, with the exception that it additionally supports sparcc.
+}
+\examples{
+correlationDT <- selfCorrelation(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = 'sparcc', format = 'data.table')
+correlationOutput <- selfCorrelation(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = 'sparcc', format = 'ComputeResult')
+alsoCorrelationDT <- getComputeResult(correlationOutput, "data.table")
+}

From c40bba3429a1b02ddad06a37fd7e10c4b9d7d352 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Tue, 16 Apr 2024 22:36:32 -0400
Subject: [PATCH 10/24] an attempt at an alpha div vignette

---
 .gitignore             |   1 +
 DESCRIPTION            |   6 ++-
 vignettes/.gitignore   |   2 +
 vignettes/alphadiv.Rmd | 100 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 108 insertions(+), 1 deletion(-)
 create mode 100644 vignettes/.gitignore
 create mode 100644 vignettes/alphadiv.Rmd

diff --git a/.gitignore b/.gitignore
index 1e236e1..6c949a1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -48,3 +48,4 @@ po/*~
 # RStudio Connect folder
 rsconnect/
 docs
+inst/doc
diff --git a/DESCRIPTION b/DESCRIPTION
index fe6e8bd..5f58df2 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -33,5 +33,9 @@ RoxygenNote: 7.3.1
 Suggests: 
     testthat (>= 3.0.0),
     S4Vectors,
-    microbiomeData
+    microbiomeData,
+    knitr,
+    rmarkdown,
+    ggplot2
 Config/testthat/edition: 3
+VignetteBuilder: knitr
diff --git a/vignettes/.gitignore b/vignettes/.gitignore
new file mode 100644
index 0000000..097b241
--- /dev/null
+++ b/vignettes/.gitignore
@@ -0,0 +1,2 @@
+*.html
+*.R
diff --git a/vignettes/alphadiv.Rmd b/vignettes/alphadiv.Rmd
new file mode 100644
index 0000000..00f0266
--- /dev/null
+++ b/vignettes/alphadiv.Rmd
@@ -0,0 +1,100 @@
+---
+title: "Alpha Diversity"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Alpha Diversity}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+library(MicrobiomeDB)
+```
+
+## What is Alpha Diversity?
+
+Alpha diversity measures the diversity of microbial taxa within a single sample or community. It takes into account both the number of different taxa (richness) and their distribution (evenness). Understanding alpha diversity provides insights into the complexity and structure of microbial communities at a local level.
+
+##Why Care About Alpha Diversity?
+
+Researchers are interested in alpha diversity for several reasons:
+
+**Community Comparisons**: Compare the diversity of microbial communities across different samples or conditions.
+
+**Health Assessments**: Assess the health or stability of microbial communities within specific environments or host systems.
+
+**Ecological Understanding**: Gain insights into the ecological dynamics of microbial communities at a local scale.
+
+## How is Alpha Diversity Calculated?
+
+This package offers three diversity indices for calculation: Shannon, Simpson and Evenness.
+
+### Shannon Diversity Index
+
+The Shannon diversity index measures the entropy or uncertainty in predicting the identity of a randomly chosen taxon within a sample.
+
+It can be calculated as follows:
+
+```{r}
+## first lets find some interesting data
+microbiomeData::getCuratedDatasetNames()
+getCollectionNames(microbiomeData::HMP_WGS)
+
+## grab a collection we like
+genus <- getCollection(microbiomeData::HMP_WGS, 'WGS Genus')
+
+## get an alpha diversity ComputeResult
+alphaDivOutput <- alphaDiversity(genus, method = 'shannon')
+```
+
+### Simpson Diversity Index
+
+The Simpson diversity index measures the probability that two individuals randomly selected from the sample will belong to different taxa.
+
+It can be calculated as follows:
+
+```{r}
+## get an alpha diversity ComputeResult
+genus <- getCollection(microbiomeData::HMP_WGS, 'WGS Genus')
+alphaDivOutput <- alphaDiversity(genus, method = 'simpson')
+```
+
+### Species Evenness
+
+Species evenness describes the distribution of abundances across the species in a sample.
+Species evenness is highest when all species in a sample have the same abundance and approaches zero as relative abundances vary.
+
+```{r}
+## get an alpha diversity ComputeResult
+genus <- getCollection(microbiomeData::HMP_WGS, 'WGS Genus')
+alphaDivOutput <- alphaDiversity(genus, method = 'evenness')
+```
+
+## Visualizing Alpha Diversity
+
+Alpha Diversity is frequently visualized as scatter and box plots. Creating these types of plots can be done like the following:
+
+```{r}
+## choose one or more metadata variables to integrate with the compute result
+alphaDiv_withMetadata <- getComputeResultWithMetadata(
+  alphaDivOutput,
+  microbiomeData::HMP_WGS,
+  metadataVariables = c('host_body_habitat')
+)
+
+## plot the compute result with integrated metadata
+ggplot(alphaDiv_withMetadata) +
+  aes(x=alphaDiversity, y=host_body_habitat, fill=host_body_habitat) +
+  geom_boxplot() +
+  labs(y= "Body site", x = "Alpha diversity (Shannon)",
+        title="Alpha diversity by body site",
+        caption=paste0("produced on ", Sys.time())) +
+  theme_bw()
+```

From 33e98737850205630edc7e2a17edd695aea16fb0 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Tue, 16 Apr 2024 22:54:57 -0400
Subject: [PATCH 11/24] draft beta div vignette

---
 vignettes/betadiv.Rmd | 141 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 141 insertions(+)
 create mode 100644 vignettes/betadiv.Rmd

diff --git a/vignettes/betadiv.Rmd b/vignettes/betadiv.Rmd
new file mode 100644
index 0000000..0a5cbb6
--- /dev/null
+++ b/vignettes/betadiv.Rmd
@@ -0,0 +1,141 @@
+---
+title: "Beta Diversity"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Beta Diversity}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+library(MicrobiomeDB)
+```
+
+## What is Beta Diversity?
+
+Beta diversity measures the dissimilarity or diversity between different microbial communities. 
+In the context of microbiome studies, it quantifies how microbial compositions vary across samples. 
+Understanding beta diversity allows researchers to explore the unique features of each sample and identify 
+patterns in microbial community structure.
+
+## Why Care About Beta Diversity?
+
+Researchers are interested in beta diversity for several reasons:
+
+**Ecological Insights**: Beta diversity helps uncover the ecological differences between microbial communities 
+in different environments or conditions.
+
+**Disease Studies**: In medical research, beta diversity can highlight variations in microbial communities associated
+with health or disease states.
+
+**Community Dynamics**: Studying beta diversity provides information about how microbial communities change over 
+time or in response to specific factors.
+
+## How is Beta Diversity Calculated?
+
+Beta Diversity can be calculated by first producing a dissimilarity matrix for all samples and then applying a dimensional 
+reduction technique to the dissimilarity matrix.
+
+This package offers flexibility in calculating beta diversity by providing multiple dissimilarity matrix options:
+
+#### Bray-Curtis Dissimilarity
+
+The Bray-Curtis algorithm measures compositional dissimilarity based on both the presence and abundance of taxa. 
+It calculates the normalized absolute differences in taxon abundance between two samples, providing a metric that 
+ranges from 0 (complete similarity) to 1 (complete dissimilarity).
+
+```{r}
+## first lets find some interesting data
+microbiomeData::getCuratedDatasetNames()
+getCollectionNames(microbiomeData::HMP_WGS)
+
+## grab a collection we like
+HMP_WGS_species <- getCollection(microbiomeData::HMP_WGS, 'WGS Species')
+
+## get a betaDiv ComputeResult
+betaDiv <- betaDiv(HMP_WGS_species, method = "bray")
+```
+
+#### Jaccard Dissimilarity
+
+Measures dissimilarity based on the presence-absence of taxa. It quantifies the proportion of taxa that are
+not shared between two samples.
+
+```{r}
+HMP_WGS_species <- getCollection(microbiomeData::HMP_WGS, 'WGS Species')
+betaDiv <- betaDiv(HMP_WGS_species, method = "jaccard")
+```
+
+#### Jensen-Shannon Divergence (JSD)
+
+Captures dissimilarity considering both abundance and presence-absence information. It is a symmetric version 
+of the Kullback-Leibler Divergence, providing a measure of dissimilarity between probability distributions.
+
+```{r}
+HMP_WGS_species <- getCollection(microbiomeData::HMP_WGS, 'WGS Species')
+betaDiv <- betaDiv(HMP_WGS_species, method = "jsd")
+```
+
+
+### Principal Coordinate Analysis (PCoA):
+
+PCoA is a dimensional reduction technique applied to the dissimilarity matrix, providing a visual representation of the
+relationships between samples in a lower-dimensional space. It transforms the dissimilarity matrix into a set of orthogonal 
+axes (principal coordinates) that capture the maximum variance in the data. The PCoA plot allows researchers to visualize 
+the spatial arrangement of samples, aiding in the interpretation of beta diversity. The **MicrobiomeDB** package performs
+PCoA as part of the `betaDiv` method.
+
+## Interpreting PCoA Results
+
+The following code will produce a PCoA plot:
+
+```{r}
+## choose one or more metadata variables to integrate with the compute result
+betaDiv_withMetadata <- getComputeResultWithMetadata(
+  betaDiv, 
+  HMP_WGS, 
+  metadataVariables = c('host_body_habitat'))
+
+## plot beta diversity
+ggplot(betaDiv_withMetadata) +
+  aes(x=Axis1, y=Axis2, color=host_body_habitat) +
+  geom_point() +
+  labs(y= "Axis 2", x = "Axis 1",
+        title="Beta diversity by body site",
+        caption=paste0("produced on ", Sys.time())) +
+  theme_bw()
+```
+
+The PCoA plot visually represents the dissimilarity between samples. Each point on the plot corresponds to a sample, 
+and the position of the points reflects their relationships based on beta diversity. Here's how to interpret the PCoA plot:
+
+### Axes Representation:
+
+The axes (principal coordinates) on the PCoA plot represent the dimensions of maximum variance in the dissimilarity matrix.
+
+The distance between points on the plot reflects the dissimilarity between corresponding samples.
+
+Each axis explains a certain percentage of the total variation in the data.
+
+### Interpreting Axis Direction:
+
+The direction of the axes indicates the major patterns of dissimilarity in the data.
+
+Samples that cluster together on the plot are more similar to each other, while those farther apart are more dissimilar.
+
+### Percentage of Variance:
+
+Check the percentage of variance explained by each axis. Higher percentages indicate that the axis captures more information 
+about the dissimilarity between samples.
+
+### Biological Interpretation:
+
+Interpret the biological meaning of the sample clustering. Are there distinct groups or trends in the data?  
+Color the PCoA plot with another variable to learn more about these patterns!
\ No newline at end of file

From 1d1a659ec7c6652f00e96d8c67904695e2e579bf Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Wed, 17 Apr 2024 09:24:49 -0400
Subject: [PATCH 12/24] draft correlation vignette

---
 vignettes/correlation.Rmd | 157 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 157 insertions(+)
 create mode 100644 vignettes/correlation.Rmd

diff --git a/vignettes/correlation.Rmd b/vignettes/correlation.Rmd
new file mode 100644
index 0000000..6cd5451
--- /dev/null
+++ b/vignettes/correlation.Rmd
@@ -0,0 +1,157 @@
+---
+title: "Correlation"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Correlation}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+library(MicrobiomeDB)
+```
+
+## What is a Correlation Analysis?
+
+Correlations are useful for identifying relationships between variables. They are very helpful
+for identifying biomarkers and functional associations, among other things.
+
+Biomarkers in microbiome data are microbial taxa or features that exhibit a significant correlation
+with particular conditions, traits, or variables in sample metadata. Identifying these biomarkers
+allows researchers to uncover associations between the microbiome and external factors, such as age, 
+disease status, or environmental conditions. Biomarkers serve as indicators of specific biological 
+or clinical phenomena within microbial communities.
+
+Functional associations in microbiome data refer to relationships between taxonomic relative abundances 
+and abundances of specific pathways or other functional features. Understanding these associations 
+allows researchers to explore how the taxonomic composition of microbial communities influences 
+functional capabilities.
+
+## Why Care About Biomarker Discovery?
+
+Researchers are interested in biomarker discovery for several reasons:
+
+**Diagnostic Insights**: Biomarkers can serve as potential diagnostic indicators, helping identify 
+microbial patterns associated with specific conditions.
+
+**Predictive Modeling**: Understanding biomarker correlations enables predictive modeling of microbial 
+responses to external factors.
+
+**Biological Significance**: Biomarkers provide insights into the biological significance of microbial 
+community variations in response to different conditions.
+
+## Why Care About Functional Associations?
+
+**Biological Insights**: Explore how changes in taxonomic composition may impact the functional potential 
+of microbial communities.
+
+**Pathway-Level Analysis**: Understand how specific pathways or functional features correlate with 
+taxonomic abundance, providing pathway-level insights.
+
+**Predictive Modeling**: Assessing functional associations aids in predicting microbial functional 
+responses to environmental changes or perturbations.
+
+## How are Correlations Calculated?
+
+This app employs correlation analysis between microbial taxonomic abundances and sample metadata 
+or abundances of pathways or other functional data using the following approach:
+
+Users can choose either Spearman or Pearson correlation for the analysis. Both will produce
+a correlation coefficient and a p-value indicating statistical significance.
+
+### Spearman Correlation
+
+Use when the relationship between variables is monotonic but not necessarily linear. Suitable for 
+non-linear associations.
+
+```{r}
+## first lets find some interesting data
+microbiomeData::getCuratedDatasetNames()
+getCollectionNames(microbiomeData::HMP_WGS)
+
+## grab a collection of interest
+HMP_WGS_species <- getCollection(HMP_WGS, "WGS Species")
+
+## get a correlation ComputeResult
+## this is not necessarily to recommend spearman for metadata. 
+## it is simply exemplary. Always look at your data!
+species_vs_metadata <- correlation(HMP_WGS_species, method = 'spearman') 
+```
+
+### Pearson Correlation
+
+Use when the relationship between variables is linear. Suitable for assessing linear associations.
+
+```{r}
+## grab two collections of interest, in this case species level data and pathway abundance data
+HMP_WGS_species <- getCollection(HMP_WGS, "WGS Species")
+HMP_WGS_pathways <- getCollection(HMP_WGS, "WGS Metagenome enzyme pathway abundance data" )
+
+## get a correlation ComputeResult
+## this is not necessarily to recommend pearson for functional data.
+## it is simply exemplary. Always look at your data!
+pathway_vs_species <- correlation(HMP_WGS_species, HMP_WGS_pathways, method = 'pearson')
+```
+
+## Interpreting Results
+
+**Correlation Coefficients**: Assess the strength and direction of correlations. Positive coefficients 
+indicate positive correlations, while negative coefficients indicate negative correlations.
+
+**p-values and Adjusted p-values**: Identify biomarkers with statistically significant correlations, 
+considering adjustments for multiple testing.
+
+You can extract these metrics and sort and filter results by them:
+```{r}
+## you can extract network metrics 
+pathway_vs_species.metrics <- as_tibble(
+  getComputeResult(
+    pathway_vs_species,
+    correlationCoefThreshold = 0.5,
+    pValueThreshold = 0.05
+  )
+)
+
+## it's also easy to sort and filter these network metrics
+## begin by renaming columns
+colnames(pathway_vs_species.metrics) <- c('species', 'pathway', 'correlationCoef', 'pValue')
+pathway_vs_species.metrics %>%
+  filter(species == "Faecalibacterium prausnitzii") %>%
+  filter(correlationCoef > 0.5) %>%
+  arrange(desc(correlationCoef))
+```
+
+You can also visualize them with custom htmlwidgets:
+
+```{r}
+## now plot the network
+## filters can be applied based on correlation coefficient and p-value
+## renders an interactive htmlwidget
+correlationNetwork(
+  pathway_vs_species,
+  correlationCoefThreshold = 0.5,
+  pValueThreshold = 0.05,
+  bipartiteNetwork = TRUE
+)
+```
+
+Finally, for more advanced analysis of the network, you can extract it as an igraph object:
+
+```{r}
+## if you extract the network as an igraph object, you can get more detailed metrics
+pathway_vs_species.igraph <- getComputeesult(pathway_vs_species, format = 'igraph')
+degree <- degree(pathway_vs_species.igraph)
+edgeBT <- edge_betweenness(pathway_vs_species.igraph)
+pgRank <- page_rank(pathway_vs_species.igraph)
+layout <- layout_with_kk(pathway_vs_species.igraph)
+
+layout
+```
+

From 42443b88a4aab4a4f5caa4f13eb89004c145d14e Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Wed, 17 Apr 2024 09:50:12 -0400
Subject: [PATCH 13/24] draft self correlation vignette

---
 vignettes/correlation.Rmd      |   7 +-
 vignettes/self-correlation.Rmd | 114 +++++++++++++++++++++++++++++++++
 2 files changed, 117 insertions(+), 4 deletions(-)
 create mode 100644 vignettes/self-correlation.Rmd

diff --git a/vignettes/correlation.Rmd b/vignettes/correlation.Rmd
index 6cd5451..6af790c 100644
--- a/vignettes/correlation.Rmd
+++ b/vignettes/correlation.Rmd
@@ -60,7 +60,7 @@ responses to environmental changes or perturbations.
 
 ## How are Correlations Calculated?
 
-This app employs correlation analysis between microbial taxonomic abundances and sample metadata 
+This package employs correlation analysis between microbial taxonomic abundances and sample metadata 
 or abundances of pathways or other functional data using the following approach:
 
 Users can choose either Spearman or Pearson correlation for the analysis. Both will produce
@@ -133,12 +133,11 @@ You can also visualize them with custom htmlwidgets:
 ```{r}
 ## now plot the network
 ## filters can be applied based on correlation coefficient and p-value
-## renders an interactive htmlwidget
+## renders an interactive htmlwidget, using a predetermined layout
 correlationNetwork(
   pathway_vs_species,
   correlationCoefThreshold = 0.5,
-  pValueThreshold = 0.05,
-  bipartiteNetwork = TRUE
+  pValueThreshold = 0.05
 )
 ```
 
diff --git a/vignettes/self-correlation.Rmd b/vignettes/self-correlation.Rmd
new file mode 100644
index 0000000..4618b5a
--- /dev/null
+++ b/vignettes/self-correlation.Rmd
@@ -0,0 +1,114 @@
+---
+title: "Self Correlation"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Self Correlation}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+library(MicrobiomeDB)
+```
+
+## What is a Correlation Analysis?
+
+Correlations are useful for identifying relationships between variables. When you correlate taxonomic
+abundance variables against themselves, it essentially produces a microbial network. Producing and
+analyzing these networks is complicated by the fact that microbiome data is inherently compositional.
+If not properly accounted for, that can result in a lot of spurious negative correlations. 
+
+## Why Care About Microbial Network Analysis?
+
+Scientists delve into microbial network analysis for a large number of reasons:
+
+**Unraveling Complex Interactions**: Microbial network analysis unveils intricate relationships between 
+microorganisms, shedding light on their interconnectedness within ecosystems.
+
+**Diagnostic Potential**: Identifying key nodes in microbial networks can unveil potential diagnostic 
+markers, offering a glimpse into microbial dynamics associated with various diseases or environmental 
+conditions.
+
+**Insights into Functionality**: Microbial network analysis elucidates functional roles within microbial 
+communities, offering valuable insights into their ecological and physiological significance.
+
+From unraveling ecological mysteries to informing medical diagnoses, microbial network analysis proves 
+indispensable in understanding the intricate world of microorganisms.
+
+## How are Correlations Calculated?
+
+This package employs SPARCC for correlation analysis between microbial taxa. SPARCC calculates correlations 
+that account for the compositional nature of microbiome data.
+
+```{r}
+## first lets find some interesting data
+microbiomeData::getCuratedDatasetNames()
+getCollectionNames(microbiomeData::HMP_WGS)
+
+## grab a collection of interest
+Bangladesh_genus <- getCollection(Bangladesh, "16S (V4) Genus")
+
+## get a self correlation ComputeResult
+## methods spearman and pearson are options here as well, for data known to not be compositional
+selfCorrelation_genus <- selfCorrelation(Bangladesh_genus, method='sparcc')
+```
+
+## Interpreting Results
+
+**Correlation Coefficients**: Assess the strength and direction of correlations. Positive coefficients 
+indicate positive correlations, while negative coefficients indicate negative correlations.
+
+**p-values and Adjusted p-values**: Identify biomarkers with statistically significant correlations, 
+considering adjustments for multiple testing.
+
+You can extract these metrics and sort and filter results by them:
+```{r}
+## you can extract network metrics 
+selfCorrelation_genus.metrics <- as_tibble(
+    getComputeResult(
+        selfCorrelation_genus,
+        correlationCoefThreshold = 0.5,
+        pValueThreshold = 0.05
+    )
+)
+
+## it's also easy to sort and filter these network metrics
+## begin by renaming columns
+colnames(selfCorrelation_genus.metrics) <- c('taxa1', 'taxa2', 'correlationCoef', 'pValue')
+selfCorrelation_genus.metrics %>%
+    filter(correlationCoef > 0.5) %>%
+    arrange(desc(correlationCoef))
+```
+
+You can also visualize them with custom htmlwidgets:
+
+```{r}
+## now plot the network
+## filters can be applied based on correlation coefficient and p-value
+## renders an interactive htmlwidget, using a predetermined layout
+correlationNetwork(
+    selfCorrelation_genus,
+    correlationCoefThreshold = 0.5,
+    pValueThreshold = 0.05
+)
+```
+
+Finally, for more advanced analysis of the network, you can extract it as an igraph object:
+
+```{r}
+## if you extract the network as an igraph object, you can get more detailed metrics
+selfCorrelation_genus.igraph <- getComputeesult(selfCorrelation_genus, format = 'igraph')
+degree <- degree(selfCorrelation_genus.igraph)
+edgeBT <- edge_betweenness(selfCorrelation_genus.igraph)
+pgRank <- page_rank(selfCorrelation_genus.igraph)
+layout <- layout_with_kk(selfCorrelation_genus.igraph)
+
+layout
+```
\ No newline at end of file

From f904c76fe944a74833e229d0f4c74842f503f102 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Wed, 17 Apr 2024 10:09:17 -0400
Subject: [PATCH 14/24] draft ranked abund vignette

---
 vignettes/ranked-abundance.Rmd | 103 +++++++++++++++++++++++++++++++++
 1 file changed, 103 insertions(+)
 create mode 100644 vignettes/ranked-abundance.Rmd

diff --git a/vignettes/ranked-abundance.Rmd b/vignettes/ranked-abundance.Rmd
new file mode 100644
index 0000000..64e995e
--- /dev/null
+++ b/vignettes/ranked-abundance.Rmd
@@ -0,0 +1,103 @@
+---
+title: "Ranked Relative Abundances"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Ranked Relative Abundances}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+library(MicrobiomeDB)
+```
+
+## What are Relative Abundances
+
+Relative abundances refer to the proportion or percentage of each taxonomic group or microbial species 
+within a given sample or ecosystem. It quantifies the contribution of each taxon to the overall microbial
+community, providing insights into the distribution and dominance of different organisms. Relative 
+abundances are typically determined through techniques like amplicon sequencing or metagenomic analysis, 
+allowing researchers to compare the prevalence of various taxa across different samples or conditions. 
+This information is crucial for understanding the structure, dynamics, and ecological roles of microbial 
+communities.
+
+## Why Compare or Find Taxa Based on Relative Abundances?
+
+Researchers are drawn to comparing or finding taxa based on relative abundances for a multitude of reasons:
+
+**Ecosystem Dynamics**: Assessing relative abundances allows researchers to unravel shifts in microbial 
+community structures over time or in response to environmental changes, offering insights into ecosystem 
+dynamics and stability.
+
+**Biological Significance**: Identifying taxa with significant shifts in relative abundances can unveil 
+their roles in ecosystem functions, providing valuable clues about their ecological significance and 
+potential impacts on ecosystem processes.
+
+**Disease Associations**: Comparing taxa based on relative abundances can reveal microbial signatures 
+associated with various diseases or health conditions, paving the way for potential biomarker discovery 
+and diagnostic insights.
+
+**Taxonomic Profiling**: Understanding the relative abundances of different taxa provides a comprehensive 
+snapshot of microbial community compositions, aiding in taxonomic profiling and classification efforts.
+
+From deciphering ecological shifts to uncovering disease markers, comparing or finding taxa based on 
+relative abundances offers a powerful lens through which researchers can explore the intricate world of 
+microbial communities.
+
+## How to Find Ranked Relative Abundances?
+
+This package offers a convenience function for finding taxa by ranking their relative abundances by some
+metric of interest. Available metrics are `mean`, `median`, `q3` and `variance`.
+
+```{r}
+## first lets find some interesting data
+microbiomeData::getCuratedDatasetNames()
+getCollectionNames(microbiomeData::HMP_WGS)
+
+## grab a collection of interest
+HMP_WGS_species <- getCollection(HMP_WGS, "WGS Species")
+
+## get a ranked abundance ComputeResult
+## top 10 taxa by mean relative abundance across all samples
+rankedAbund <- rankedAbundance(HMP_WGS_species, method = "mean", cutoff = 10) 
+## top 8 taxa by median relative abundance across all samples
+rankedAbund <- rankedAbundance(HMP_WGS_species, method = "median", cutoff = 8)
+## top 20 taxa by third quartile relative abundance across all samples
+rankedAbund <- rankedAbundance(HMP_WGS_species, method = "q3", cutoff = 20)
+## top 10 taxa by greatest variance in relative abundance across all samples
+rankedAbund <- rankedAbundance(HMP_WGS_species, method = "variance", cutoff = 10)
+```
+
+## Visualizing Ranked Relative Abundances
+
+Abundances are frequently visualized as scatter and box plots. Creating these types of plots can be done like the following:
+
+```{r}
+## combine ranked abundance result with sample metadata of interest
+rankedAbund_withMetadata <- getComputeResultWithMetadata(
+    rankedAbund, 
+    HMP_WGS, 
+    metadataVariables = c('host_body_habitat'))
+
+## pivot the dataframe to be able to plot it
+rankedAbund_withMetadata.pivot <- pivot_longer(rankedAbund_withMetadata, # dataframe to be pivoted
+                                    cols = 4:13, # column names to be stored as a SINGLE variable
+                                    names_to = "taxa", # name of that new variable (column)
+                                    values_to = "abundance") # name of new variable (column) storing all the values (data)
+
+## plot the compute result with integrated metadata
+ggplot(rankedAbund_withMetadata.pivot) +
+    aes(x=abundance, y=taxa, fill = factor(host_body_habitat)) +
+    geom_boxplot() +
+    labs(y= "Taxon", x = "Relative abundance",
+        title="Relative abudnace of top taxa",
+        caption=paste0("produced on ", Sys.time())) +
+    theme_bw()
+```

From aaa032c8ba862d0f7d23fdfe76313a146d039ffa Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Wed, 17 Apr 2024 10:12:53 -0400
Subject: [PATCH 15/24] explicitly namespace datasets

---
 vignettes/correlation.Rmd      | 6 +++---
 vignettes/ranked-abundance.Rmd | 2 +-
 vignettes/self-correlation.Rmd | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/vignettes/correlation.Rmd b/vignettes/correlation.Rmd
index 6af790c..15fea55 100644
--- a/vignettes/correlation.Rmd
+++ b/vignettes/correlation.Rmd
@@ -77,7 +77,7 @@ microbiomeData::getCuratedDatasetNames()
 getCollectionNames(microbiomeData::HMP_WGS)
 
 ## grab a collection of interest
-HMP_WGS_species <- getCollection(HMP_WGS, "WGS Species")
+HMP_WGS_species <- getCollection(microbiomeData::HMP_WGS, "WGS Species")
 
 ## get a correlation ComputeResult
 ## this is not necessarily to recommend spearman for metadata. 
@@ -91,8 +91,8 @@ Use when the relationship between variables is linear. Suitable for assessing li
 
 ```{r}
 ## grab two collections of interest, in this case species level data and pathway abundance data
-HMP_WGS_species <- getCollection(HMP_WGS, "WGS Species")
-HMP_WGS_pathways <- getCollection(HMP_WGS, "WGS Metagenome enzyme pathway abundance data" )
+HMP_WGS_species <- getCollection(microbiomeData::HMP_WGS, "WGS Species")
+HMP_WGS_pathways <- getCollection(microbiomeData::HMP_WGS, "WGS Metagenome enzyme pathway abundance data" )
 
 ## get a correlation ComputeResult
 ## this is not necessarily to recommend pearson for functional data.
diff --git a/vignettes/ranked-abundance.Rmd b/vignettes/ranked-abundance.Rmd
index 64e995e..3f54801 100644
--- a/vignettes/ranked-abundance.Rmd
+++ b/vignettes/ranked-abundance.Rmd
@@ -62,7 +62,7 @@ microbiomeData::getCuratedDatasetNames()
 getCollectionNames(microbiomeData::HMP_WGS)
 
 ## grab a collection of interest
-HMP_WGS_species <- getCollection(HMP_WGS, "WGS Species")
+HMP_WGS_species <- getCollection(microbiomeData::HMP_WGS, "WGS Species")
 
 ## get a ranked abundance ComputeResult
 ## top 10 taxa by mean relative abundance across all samples
diff --git a/vignettes/self-correlation.Rmd b/vignettes/self-correlation.Rmd
index 4618b5a..0d5c9b8 100644
--- a/vignettes/self-correlation.Rmd
+++ b/vignettes/self-correlation.Rmd
@@ -50,10 +50,10 @@ that account for the compositional nature of microbiome data.
 ```{r}
 ## first lets find some interesting data
 microbiomeData::getCuratedDatasetNames()
-getCollectionNames(microbiomeData::HMP_WGS)
+getCollectionNames(microbiomeData::Bangladesh)
 
 ## grab a collection of interest
-Bangladesh_genus <- getCollection(Bangladesh, "16S (V4) Genus")
+Bangladesh_genus <- getCollection(microbiomeData::Bangladesh, "16S (V4) Genus")
 
 ## get a self correlation ComputeResult
 ## methods spearman and pearson are options here as well, for data known to not be compositional

From f1763f1add9be7d4b102592601223556325e1c51 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Wed, 17 Apr 2024 10:55:16 -0400
Subject: [PATCH 16/24] fixing up vignettes

---
 DESCRIPTION                    | 2 +-
 vignettes/alphadiv.Rmd         | 6 +++---
 vignettes/betadiv.Rmd          | 2 +-
 vignettes/correlation.Rmd      | 3 ++-
 vignettes/ranked-abundance.Rmd | 7 ++++---
 vignettes/self-correlation.Rmd | 1 +
 6 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 5f58df2..f8dacbb 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -36,6 +36,6 @@ Suggests:
     microbiomeData,
     knitr,
     rmarkdown,
-    ggplot2
+    tidyverse
 Config/testthat/edition: 3
 VignetteBuilder: knitr
diff --git a/vignettes/alphadiv.Rmd b/vignettes/alphadiv.Rmd
index 00f0266..cc65fd0 100644
--- a/vignettes/alphadiv.Rmd
+++ b/vignettes/alphadiv.Rmd
@@ -51,7 +51,7 @@ getCollectionNames(microbiomeData::HMP_WGS)
 genus <- getCollection(microbiomeData::HMP_WGS, 'WGS Genus')
 
 ## get an alpha diversity ComputeResult
-alphaDivOutput <- alphaDiversity(genus, method = 'shannon')
+alphaDivOutput <- alphaDiv(genus, method = 'shannon')
 ```
 
 ### Simpson Diversity Index
@@ -63,7 +63,7 @@ It can be calculated as follows:
 ```{r}
 ## get an alpha diversity ComputeResult
 genus <- getCollection(microbiomeData::HMP_WGS, 'WGS Genus')
-alphaDivOutput <- alphaDiversity(genus, method = 'simpson')
+alphaDivOutput <- alphaDiv(genus, method = 'simpson')
 ```
 
 ### Species Evenness
@@ -74,7 +74,7 @@ Species evenness is highest when all species in a sample have the same abundance
 ```{r}
 ## get an alpha diversity ComputeResult
 genus <- getCollection(microbiomeData::HMP_WGS, 'WGS Genus')
-alphaDivOutput <- alphaDiversity(genus, method = 'evenness')
+alphaDivOutput <- alphaDiv(genus, method = 'evenness')
 ```
 
 ## Visualizing Alpha Diversity
diff --git a/vignettes/betadiv.Rmd b/vignettes/betadiv.Rmd
index 0a5cbb6..9ebf898 100644
--- a/vignettes/betadiv.Rmd
+++ b/vignettes/betadiv.Rmd
@@ -100,7 +100,7 @@ The following code will produce a PCoA plot:
 ## choose one or more metadata variables to integrate with the compute result
 betaDiv_withMetadata <- getComputeResultWithMetadata(
   betaDiv, 
-  HMP_WGS, 
+  microbiomeData::HMP_WGS, 
   metadataVariables = c('host_body_habitat'))
 
 ## plot beta diversity
diff --git a/vignettes/correlation.Rmd b/vignettes/correlation.Rmd
index 15fea55..d1fe152 100644
--- a/vignettes/correlation.Rmd
+++ b/vignettes/correlation.Rmd
@@ -16,6 +16,7 @@ knitr::opts_chunk$set(
 
 ```{r setup}
 library(MicrobiomeDB)
+library(tidyverse)
 ```
 
 ## What is a Correlation Analysis?
@@ -77,7 +78,7 @@ microbiomeData::getCuratedDatasetNames()
 getCollectionNames(microbiomeData::HMP_WGS)
 
 ## grab a collection of interest
-HMP_WGS_species <- getCollection(microbiomeData::HMP_WGS, "WGS Species")
+HMP_WGS_species <- getCollection(microbiomeData::HMP_WGS, "WGS Species", continuousMetadataOnly = TRUE)
 
 ## get a correlation ComputeResult
 ## this is not necessarily to recommend spearman for metadata. 
diff --git a/vignettes/ranked-abundance.Rmd b/vignettes/ranked-abundance.Rmd
index 3f54801..d5cd968 100644
--- a/vignettes/ranked-abundance.Rmd
+++ b/vignettes/ranked-abundance.Rmd
@@ -16,6 +16,7 @@ knitr::opts_chunk$set(
 
 ```{r setup}
 library(MicrobiomeDB)
+library(tidyverse)
 ```
 
 ## What are Relative Abundances
@@ -54,7 +55,7 @@ microbial communities.
 ## How to Find Ranked Relative Abundances?
 
 This package offers a convenience function for finding taxa by ranking their relative abundances by some
-metric of interest. Available metrics are `mean`, `median`, `q3` and `variance`.
+metric of interest. Available metrics are `max`, `median`, `q3` and `variance`.
 
 ```{r}
 ## first lets find some interesting data
@@ -65,8 +66,8 @@ getCollectionNames(microbiomeData::HMP_WGS)
 HMP_WGS_species <- getCollection(microbiomeData::HMP_WGS, "WGS Species")
 
 ## get a ranked abundance ComputeResult
-## top 10 taxa by mean relative abundance across all samples
-rankedAbund <- rankedAbundance(HMP_WGS_species, method = "mean", cutoff = 10) 
+## top 10 taxa by max relative abundance across all samples
+rankedAbund <- rankedAbundance(HMP_WGS_species, method = "max", cutoff = 10) 
 ## top 8 taxa by median relative abundance across all samples
 rankedAbund <- rankedAbundance(HMP_WGS_species, method = "median", cutoff = 8)
 ## top 20 taxa by third quartile relative abundance across all samples
diff --git a/vignettes/self-correlation.Rmd b/vignettes/self-correlation.Rmd
index 0d5c9b8..6ad2261 100644
--- a/vignettes/self-correlation.Rmd
+++ b/vignettes/self-correlation.Rmd
@@ -16,6 +16,7 @@ knitr::opts_chunk$set(
 
 ```{r setup}
 library(MicrobiomeDB)
+library(tidyverse)
 ```
 
 ## What is a Correlation Analysis?

From 9236dfb4d4a80cf52eb01d3269ef7031795e19bb Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Wed, 17 Apr 2024 11:14:48 -0400
Subject: [PATCH 17/24] fixing up vignettes some more

---
 vignettes/alphadiv.Rmd         | 2 +-
 vignettes/betadiv.Rmd          | 2 +-
 vignettes/correlation.Rmd      | 2 +-
 vignettes/ranked-abundance.Rmd | 4 ++--
 vignettes/self-correlation.Rmd | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/vignettes/alphadiv.Rmd b/vignettes/alphadiv.Rmd
index cc65fd0..305637d 100644
--- a/vignettes/alphadiv.Rmd
+++ b/vignettes/alphadiv.Rmd
@@ -90,7 +90,7 @@ alphaDiv_withMetadata <- getComputeResultWithMetadata(
 )
 
 ## plot the compute result with integrated metadata
-ggplot(alphaDiv_withMetadata) +
+ggplot2::ggplot(alphaDiv_withMetadata) +
   aes(x=alphaDiversity, y=host_body_habitat, fill=host_body_habitat) +
   geom_boxplot() +
   labs(y= "Body site", x = "Alpha diversity (Shannon)",
diff --git a/vignettes/betadiv.Rmd b/vignettes/betadiv.Rmd
index 9ebf898..0b89a0f 100644
--- a/vignettes/betadiv.Rmd
+++ b/vignettes/betadiv.Rmd
@@ -104,7 +104,7 @@ betaDiv_withMetadata <- getComputeResultWithMetadata(
   metadataVariables = c('host_body_habitat'))
 
 ## plot beta diversity
-ggplot(betaDiv_withMetadata) +
+ggplot2::ggplot(betaDiv_withMetadata) +
   aes(x=Axis1, y=Axis2, color=host_body_habitat) +
   geom_point() +
   labs(y= "Axis 2", x = "Axis 1",
diff --git a/vignettes/correlation.Rmd b/vignettes/correlation.Rmd
index d1fe152..0a80df2 100644
--- a/vignettes/correlation.Rmd
+++ b/vignettes/correlation.Rmd
@@ -146,7 +146,7 @@ Finally, for more advanced analysis of the network, you can extract it as an igr
 
 ```{r}
 ## if you extract the network as an igraph object, you can get more detailed metrics
-pathway_vs_species.igraph <- getComputeesult(pathway_vs_species, format = 'igraph')
+pathway_vs_species.igraph <- getComputeResult(pathway_vs_species, format = 'igraph')
 degree <- degree(pathway_vs_species.igraph)
 edgeBT <- edge_betweenness(pathway_vs_species.igraph)
 pgRank <- page_rank(pathway_vs_species.igraph)
diff --git a/vignettes/ranked-abundance.Rmd b/vignettes/ranked-abundance.Rmd
index d5cd968..ac00506 100644
--- a/vignettes/ranked-abundance.Rmd
+++ b/vignettes/ranked-abundance.Rmd
@@ -84,7 +84,7 @@ Abundances are frequently visualized as scatter and box plots. Creating these ty
 ## combine ranked abundance result with sample metadata of interest
 rankedAbund_withMetadata <- getComputeResultWithMetadata(
     rankedAbund, 
-    HMP_WGS, 
+    microbiomeData::HMP_WGS, 
     metadataVariables = c('host_body_habitat'))
 
 ## pivot the dataframe to be able to plot it
@@ -94,7 +94,7 @@ rankedAbund_withMetadata.pivot <- pivot_longer(rankedAbund_withMetadata, # dataf
                                     values_to = "abundance") # name of new variable (column) storing all the values (data)
 
 ## plot the compute result with integrated metadata
-ggplot(rankedAbund_withMetadata.pivot) +
+ggplot2::ggplot(rankedAbund_withMetadata.pivot) +
     aes(x=abundance, y=taxa, fill = factor(host_body_habitat)) +
     geom_boxplot() +
     labs(y= "Taxon", x = "Relative abundance",
diff --git a/vignettes/self-correlation.Rmd b/vignettes/self-correlation.Rmd
index 6ad2261..a3ac67d 100644
--- a/vignettes/self-correlation.Rmd
+++ b/vignettes/self-correlation.Rmd
@@ -105,7 +105,7 @@ Finally, for more advanced analysis of the network, you can extract it as an igr
 
 ```{r}
 ## if you extract the network as an igraph object, you can get more detailed metrics
-selfCorrelation_genus.igraph <- getComputeesult(selfCorrelation_genus, format = 'igraph')
+selfCorrelation_genus.igraph <- getComputeResult(selfCorrelation_genus, format = 'igraph')
 degree <- degree(selfCorrelation_genus.igraph)
 edgeBT <- edge_betweenness(selfCorrelation_genus.igraph)
 pgRank <- page_rank(selfCorrelation_genus.igraph)

From 68f1599bd2b9c2d6405f19cb160620810e72d957 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Wed, 17 Apr 2024 11:37:27 -0400
Subject: [PATCH 18/24] still cleaning up vignettes

---
 vignettes/alphadiv.Rmd         | 3 ++-
 vignettes/betadiv.Rmd          | 1 +
 vignettes/correlation.Rmd      | 1 +
 vignettes/self-correlation.Rmd | 1 +
 4 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/vignettes/alphadiv.Rmd b/vignettes/alphadiv.Rmd
index 305637d..faafbd3 100644
--- a/vignettes/alphadiv.Rmd
+++ b/vignettes/alphadiv.Rmd
@@ -16,6 +16,7 @@ knitr::opts_chunk$set(
 
 ```{r setup}
 library(MicrobiomeDB)
+library(tidyverse)
 ```
 
 ## What is Alpha Diversity?
@@ -90,7 +91,7 @@ alphaDiv_withMetadata <- getComputeResultWithMetadata(
 )
 
 ## plot the compute result with integrated metadata
-ggplot2::ggplot(alphaDiv_withMetadata) +
+ggplot(alphaDiv_withMetadata) +
   aes(x=alphaDiversity, y=host_body_habitat, fill=host_body_habitat) +
   geom_boxplot() +
   labs(y= "Body site", x = "Alpha diversity (Shannon)",
diff --git a/vignettes/betadiv.Rmd b/vignettes/betadiv.Rmd
index 0b89a0f..d58404b 100644
--- a/vignettes/betadiv.Rmd
+++ b/vignettes/betadiv.Rmd
@@ -16,6 +16,7 @@ knitr::opts_chunk$set(
 
 ```{r setup}
 library(MicrobiomeDB)
+library(tidyverse)
 ```
 
 ## What is Beta Diversity?
diff --git a/vignettes/correlation.Rmd b/vignettes/correlation.Rmd
index 0a80df2..26006d4 100644
--- a/vignettes/correlation.Rmd
+++ b/vignettes/correlation.Rmd
@@ -17,6 +17,7 @@ knitr::opts_chunk$set(
 ```{r setup}
 library(MicrobiomeDB)
 library(tidyverse)
+library(igraph)
 ```
 
 ## What is a Correlation Analysis?
diff --git a/vignettes/self-correlation.Rmd b/vignettes/self-correlation.Rmd
index a3ac67d..3861565 100644
--- a/vignettes/self-correlation.Rmd
+++ b/vignettes/self-correlation.Rmd
@@ -17,6 +17,7 @@ knitr::opts_chunk$set(
 ```{r setup}
 library(MicrobiomeDB)
 library(tidyverse)
+library(igraph)
 ```
 
 ## What is a Correlation Analysis?

From 2db42f6bfdecb4889635741d35f603b9f915b676 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Wed, 17 Apr 2024 12:15:04 -0400
Subject: [PATCH 19/24] clean up docs

---
 R/method-differentialAbundance.R     | 4 ++--
 R/reexports-microbiomeComputations.R | 2 ++
 man/Maaslin2.Rd                      | 4 ++--
 man/correlation-methods.Rd           | 2 ++
 man/selfCorrelation-methods.Rd       | 2 ++
 5 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/R/method-differentialAbundance.R b/R/method-differentialAbundance.R
index 67bce58..5e52d07 100644
--- a/R/method-differentialAbundance.R
+++ b/R/method-differentialAbundance.R
@@ -197,8 +197,8 @@ function(data, covariate, groupA, groupB, method = c("Maaslin2", "DESeq2"), verb
 #'        analysis_method = "LM", # default LM
 #'        normalization = "TSS", # default TSS
 #'        transform = "LOG", # default LOG
-#'        plot_heatmap = F,
-#'        plot_scatter = F)
+#'        plot_heatmap = FALSE,
+#'        plot_scatter = FALSE)
 #' @param data a CollectionWithMetadata
 #' @param verbose boolean indicating if timed logging is desired
 #' @param ... additional arguments to pass to Maaslin2::Maaslin2
diff --git a/R/reexports-microbiomeComputations.R b/R/reexports-microbiomeComputations.R
index 29c94b8..13b41cd 100644
--- a/R/reexports-microbiomeComputations.R
+++ b/R/reexports-microbiomeComputations.R
@@ -60,6 +60,7 @@ microbiomeComputations::betaDiv
 #' @param format string defining the desired format of the result. 
 #' The currently supported values are 'data.table' and 'ComputeResult'.
 #' @param verbose boolean indicating if timed logging is desired
+#' @param ... additional parameters
 #' @return data.frame with correlation coefficients or a ComputeResult object
 #' @importFrom veupathUtils correlation
 #' @export
@@ -82,6 +83,7 @@ veupathUtils::correlation
 #' @param proportionNonZeroThreshold numeric threshold to filter features by proportion of non-zero values across samples
 #' @param varianceThreshold numeric threshold to filter features by variance across samples
 #' @param stdDevThreshold numeric threshold to filter features by standard deviation across samples
+#' @param ... additional parameters
 #' @return ComputeResult object
 #' @importFrom veupathUtils selfCorrelation
 #' @importFrom microbiomeComputations selfCorrelation
diff --git a/man/Maaslin2.Rd b/man/Maaslin2.Rd
index afbed7d..41576c7 100644
--- a/man/Maaslin2.Rd
+++ b/man/Maaslin2.Rd
@@ -32,6 +32,6 @@ maaslinOutput <- MicrobiomeDB::Maaslin2(
        analysis_method = "LM", # default LM
        normalization = "TSS", # default TSS
        transform = "LOG", # default LOG
-       plot_heatmap = F,
-       plot_scatter = F)
+       plot_heatmap = FALSE,
+       plot_scatter = FALSE)
 }
diff --git a/man/correlation-methods.Rd b/man/correlation-methods.Rd
index dd9b357..7a37efc 100644
--- a/man/correlation-methods.Rd
+++ b/man/correlation-methods.Rd
@@ -25,6 +25,8 @@ The currently supported values are specific to the class of data1 and data2.}
 The currently supported values are 'data.table' and 'ComputeResult'.}
 
 \item{verbose}{boolean indicating if timed logging is desired}
+
+\item{...}{additional parameters}
 }
 \value{
 data.frame with correlation coefficients or a ComputeResult object
diff --git a/man/selfCorrelation-methods.Rd b/man/selfCorrelation-methods.Rd
index 4a9a32d..2782de1 100644
--- a/man/selfCorrelation-methods.Rd
+++ b/man/selfCorrelation-methods.Rd
@@ -21,6 +21,8 @@ selfCorrelation(
 
 \item{verbose}{boolean indicating if timed logging is desired}
 
+\item{...}{additional parameters}
+
 \item{proportionNonZeroThreshold}{numeric threshold to filter features by proportion of non-zero values across samples}
 
 \item{varianceThreshold}{numeric threshold to filter features by variance across samples}

From 9955eab381b9c83027af0ffbcf73e2d2b1890269 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Wed, 17 Apr 2024 12:38:23 -0400
Subject: [PATCH 20/24] clean up correlation docs

---
 R/reexports-microbiomeComputations.R | 8 +++-----
 man/correlation-methods.Rd           | 5 +++--
 man/selfCorrelation-methods.Rd       | 6 ------
 3 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/R/reexports-microbiomeComputations.R b/R/reexports-microbiomeComputations.R
index 13b41cd..ec3e1bf 100644
--- a/R/reexports-microbiomeComputations.R
+++ b/R/reexports-microbiomeComputations.R
@@ -50,8 +50,9 @@ microbiomeComputations::betaDiv
 #' This function returns correlation coefficients for variables in one dataset against variables in a second dataset
 #' 
 #' @examples
-#' correlationDT <- correlation(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = 'spearman', format = 'data.table')
-#' correlationOutput <- correlation(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = 'spearman', format = 'ComputeResult')
+#' diabImmune_genus <- getCollection(microbiomeData::DiabImmune, "16S (V4) Genus", continuousMetadataOnly = TRUE)
+#' correlationDT <- correlation(diabImmune_genus, method = 'spearman', format = 'data.table')
+#' correlationOutput <- correlation(diabImmune_genus, method = 'spearman', format = 'ComputeResult')
 #' alsoCorrelationDT <- getComputeResult(correlationOutput, "data.table")
 #' @param data1 first dataset. A data.table
 #' @param data2 second dataset. A data.table
@@ -80,9 +81,6 @@ veupathUtils::correlation
 #' @param method string defining the type of correlation to run. The currently supported values are 'spearman','pearson' and 'sparcc'
 #' @param format string defining the desired format of the result. The currently supported values are 'data.table' and 'ComputeResult'.
 #' @param verbose boolean indicating if timed logging is desired
-#' @param proportionNonZeroThreshold numeric threshold to filter features by proportion of non-zero values across samples
-#' @param varianceThreshold numeric threshold to filter features by variance across samples
-#' @param stdDevThreshold numeric threshold to filter features by standard deviation across samples
 #' @param ... additional parameters
 #' @return ComputeResult object
 #' @importFrom veupathUtils selfCorrelation
diff --git a/man/correlation-methods.Rd b/man/correlation-methods.Rd
index 7a37efc..41b360a 100644
--- a/man/correlation-methods.Rd
+++ b/man/correlation-methods.Rd
@@ -35,7 +35,8 @@ data.frame with correlation coefficients or a ComputeResult object
 This function returns correlation coefficients for variables in one dataset against variables in a second dataset
 }
 \examples{
-correlationDT <- correlation(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = 'spearman', format = 'data.table')
-correlationOutput <- correlation(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = 'spearman', format = 'ComputeResult')
+diabImmune_genus <- getCollection(microbiomeData::DiabImmune, "16S (V4) Genus", continuousMetadataOnly = TRUE)
+correlationDT <- correlation(diabImmune_genus, method = 'spearman', format = 'data.table')
+correlationOutput <- correlation(diabImmune_genus, method = 'spearman', format = 'ComputeResult')
 alsoCorrelationDT <- getComputeResult(correlationOutput, "data.table")
 }
diff --git a/man/selfCorrelation-methods.Rd b/man/selfCorrelation-methods.Rd
index 2782de1..290b21c 100644
--- a/man/selfCorrelation-methods.Rd
+++ b/man/selfCorrelation-methods.Rd
@@ -22,12 +22,6 @@ selfCorrelation(
 \item{verbose}{boolean indicating if timed logging is desired}
 
 \item{...}{additional parameters}
-
-\item{proportionNonZeroThreshold}{numeric threshold to filter features by proportion of non-zero values across samples}
-
-\item{varianceThreshold}{numeric threshold to filter features by variance across samples}
-
-\item{stdDevThreshold}{numeric threshold to filter features by standard deviation across samples}
 }
 \value{
 ComputeResult object

From ba6a823d6a56792a76d45553206d1df4f553f3f1 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Wed, 17 Apr 2024 12:56:20 -0400
Subject: [PATCH 21/24] fix correlationNetwork example

---
 R/methods-ComputeResult.R | 2 +-
 man/correlationNetwork.Rd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/methods-ComputeResult.R b/R/methods-ComputeResult.R
index 15accec..2d8ae60 100644
--- a/R/methods-ComputeResult.R
+++ b/R/methods-ComputeResult.R
@@ -140,7 +140,7 @@ function(object, dataset = NULL, format = c("data.table"), metadataVariables = N
 #' 
 #' @examples 
 #' correlationOutput <- MicrobiomeDB::correlation(
-#'      getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), 
+#'      getCollection(microbiomeData::DiabImmune, "16S (V4) Genus", continuousMetadataOnly = TRUE), 
 #'      method='spearman', 
 #'      verbose=FALSE
 #' )
diff --git a/man/correlationNetwork.Rd b/man/correlationNetwork.Rd
index 9372d62..7ea6adf 100644
--- a/man/correlationNetwork.Rd
+++ b/man/correlationNetwork.Rd
@@ -44,7 +44,7 @@ Visualize a correlation result as a network
 }
 \examples{
 correlationOutput <- MicrobiomeDB::correlation(
-     getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), 
+     getCollection(microbiomeData::DiabImmune, "16S (V4) Genus", continuousMetadataOnly = TRUE), 
      method='spearman', 
      verbose=FALSE
 )

From 4711e451681a91a6556587171936aebb8a3230a4 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Wed, 17 Apr 2024 14:23:28 -0400
Subject: [PATCH 22/24] fix getComputeResultWithMetadata example

---
 R/methods-ComputeResult.R           | 2 +-
 man/getComputeResultWithMetadata.Rd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/methods-ComputeResult.R b/R/methods-ComputeResult.R
index 2d8ae60..46cbf52 100644
--- a/R/methods-ComputeResult.R
+++ b/R/methods-ComputeResult.R
@@ -90,7 +90,7 @@ mergeComputeResultAndMetadata <- function(computeResult, dataset, metadataVariab
 #'      method='shannon', 
 #'      verbose=FALSE
 #' )
-#' alphaDivDT <- getComputeResultWithMetadata(alphaDivOutput, mbioDataset, metadataVariables = c('country', 'delivery_mode'))
+#' alphaDivDT <- getComputeResultWithMetadata(alphaDivOutput, microbiomeData::DiabImmune, metadataVariables = c('country', 'delivery_mode'))
 #' @param object A Microbiome Dataset
 #' @param dataset The MbioDataset, AbundanceData or Collection object from which the compute result was obtained.
 #' @param format The format you want the compute result in. Currently only "data.table" is supported.
diff --git a/man/getComputeResultWithMetadata.Rd b/man/getComputeResultWithMetadata.Rd
index a815ac1..de6eb36 100644
--- a/man/getComputeResultWithMetadata.Rd
+++ b/man/getComputeResultWithMetadata.Rd
@@ -56,5 +56,5 @@ alphaDivOutput <- MicrobiomeDB::alphaDiv(
      method='shannon', 
      verbose=FALSE
 )
-alphaDivDT <- getComputeResultWithMetadata(alphaDivOutput, mbioDataset, metadataVariables = c('country', 'delivery_mode'))
+alphaDivDT <- getComputeResultWithMetadata(alphaDivOutput, microbiomeData::DiabImmune, metadataVariables = c('country', 'delivery_mode'))
 }

From 8df79a194aae0faaf2ede37f5ceb4f1f7be612d2 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Wed, 17 Apr 2024 14:46:54 -0400
Subject: [PATCH 23/24] fix more examples

---
 R/methods-MbioDataset.R                              | 4 ++--
 man/getMetadataVariableSummary-MbioDataset-method.Rd | 2 +-
 man/getSampleMetadata-MbioDataset-method.Rd          | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/methods-MbioDataset.R b/R/methods-MbioDataset.R
index c3edfea..90ecccb 100644
--- a/R/methods-MbioDataset.R
+++ b/R/methods-MbioDataset.R
@@ -30,7 +30,7 @@ metadataVarSummaryGeneric <- getGeneric("getMetadataVariableSummary", "veupathUt
 #' Get a summary of the requested metadata variable in an MbioDataset.
 #' 
 #' @examples 
-#' getMetadataVariableSummary(microbiomeData::DiabImmune, "age_years")
+#' getMetadataVariableSummary(microbiomeData::DiabImmune, "age_months")
 #' getMetadataVariableSummary(microbiomeData::DiabImmune, "sex")
 #' getMetadataVariableSummary(microbiomeData::DiabImmune, "country")
 #' @param object An MbioDataset
@@ -61,7 +61,7 @@ sampleMetadataGeneric <- getGeneric("getSampleMetadata", "veupathUtils")
 #' 
 #' @examples
 #' getSampleMetadata(microbiomeData::DiabImmune)
-#' getSampleMetadata(microbiomeData::DiabImmune, metadataVariables = c("age_years", "sex"))
+#' getSampleMetadata(microbiomeData::DiabImmune, metadataVariables = c("age_months", "sex"))
 #' @param object MbioDataset
 #' @param asCopy boolean indicating whether to return the data as a copy or by reference
 #' @param includeIds boolean indicating whether we should include recordIdColumn and ancestorIdColumns
diff --git a/man/getMetadataVariableSummary-MbioDataset-method.Rd b/man/getMetadataVariableSummary-MbioDataset-method.Rd
index 97ae3a2..293afc6 100644
--- a/man/getMetadataVariableSummary-MbioDataset-method.Rd
+++ b/man/getMetadataVariableSummary-MbioDataset-method.Rd
@@ -18,7 +18,7 @@ a table summarizing the values of the requested metadata variable
 Get a summary of the requested metadata variable in an MbioDataset.
 }
 \examples{
-getMetadataVariableSummary(microbiomeData::DiabImmune, "age_years")
+getMetadataVariableSummary(microbiomeData::DiabImmune, "age_months")
 getMetadataVariableSummary(microbiomeData::DiabImmune, "sex")
 getMetadataVariableSummary(microbiomeData::DiabImmune, "country")
 }
diff --git a/man/getSampleMetadata-MbioDataset-method.Rd b/man/getSampleMetadata-MbioDataset-method.Rd
index d7817fe..b3b137e 100644
--- a/man/getSampleMetadata-MbioDataset-method.Rd
+++ b/man/getSampleMetadata-MbioDataset-method.Rd
@@ -28,5 +28,5 @@ Returns a data.table of sample metadata
 }
 \examples{
 getSampleMetadata(microbiomeData::DiabImmune)
-getSampleMetadata(microbiomeData::DiabImmune, metadataVariables = c("age_years", "sex"))
+getSampleMetadata(microbiomeData::DiabImmune, metadataVariables = c("age_months", "sex"))
 }

From 1d69dd728e75423edce498412a51b71bdc90505f Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Wed, 17 Apr 2024 15:34:57 -0400
Subject: [PATCH 24/24] improve linting

---
 R/methods-ComputeResult.R            |  7 +++-
 R/methods-MbioDataset.R              | 30 +++++++++++---
 R/reexports-microbiomeComputations.R | 61 +++++++++++++++++++++++-----
 man/alphaDiv-methods.Rd              |  5 ++-
 man/betaDiv-methods.Rd               |  6 ++-
 man/correlation-methods.Rd           | 26 ++++++++++--
 man/getCollection.Rd                 | 28 +++++++++++--
 man/getComputeResultWithMetadata.Rd  |  7 +++-
 man/rankedAbundance-methods.Rd       |  5 ++-
 man/selfCorrelation-methods.Rd       | 19 +++++++--
 10 files changed, 163 insertions(+), 31 deletions(-)

diff --git a/R/methods-ComputeResult.R b/R/methods-ComputeResult.R
index 46cbf52..d9d3826 100644
--- a/R/methods-ComputeResult.R
+++ b/R/methods-ComputeResult.R
@@ -90,7 +90,12 @@ mergeComputeResultAndMetadata <- function(computeResult, dataset, metadataVariab
 #'      method='shannon', 
 #'      verbose=FALSE
 #' )
-#' alphaDivDT <- getComputeResultWithMetadata(alphaDivOutput, microbiomeData::DiabImmune, metadataVariables = c('country', 'delivery_mode'))
+#' 
+#' alphaDivDT <- getComputeResultWithMetadata(
+#'      alphaDivOutput, 
+#'      microbiomeData::DiabImmune, 
+#'      metadataVariables = c('country', 'delivery_mode')
+#' )
 #' @param object A Microbiome Dataset
 #' @param dataset The MbioDataset, AbundanceData or Collection object from which the compute result was obtained.
 #' @param format The format you want the compute result in. Currently only "data.table" is supported.
diff --git a/R/methods-MbioDataset.R b/R/methods-MbioDataset.R
index 90ecccb..6ec6611 100644
--- a/R/methods-MbioDataset.R
+++ b/R/methods-MbioDataset.R
@@ -133,11 +133,31 @@ setMethod("updateCollectionName", "MbioDataset", function(object, oldName, newNa
 #' Get a collection from the Microbiome Dataset. The collection will be returned
 #' as an AbundanceData, phyloseq, or Collection object.
 #' 
-#' @examples 
-#' genus <- getCollection(microbiomeData::DiabImmune, "16S (V4) Genus")
-#' genus_phyloseq <- getCollection(microbiomeData::DiabImmune, "16S (V4) Genus", format = "phyloseq")
-#' genus_continuous <- getCollection(microbiomeData::DiabImmune, "16S (V4) Genus", continuousMetadataOnly = TRUE) ## to pass to correlation method
-#' genus_collection <- getCollection(microbiomeData::DiabImmune, "16S (V4) Genus", format = "Collection") ## with no metadata
+#' @examples
+#' genus <- getCollection(
+#'      microbiomeData::DiabImmune, 
+#'      "16S (V4) Genus"
+#' )
+#' 
+#' genus_phyloseq <- getCollection(
+#'      microbiomeData::DiabImmune, 
+#'      "16S (V4) Genus", 
+#'      format = "phyloseq"
+#' )
+#' 
+#' ## to pass to correlation method, we want only continuous metadata
+#' genus_continuous <- getCollection(
+#'      microbiomeData::DiabImmune, 
+#'      "16S (V4) Genus", 
+#'      continuousMetadataOnly = TRUE
+#' ) 
+#' 
+#' ## with no metadata
+#' genus_collection <- getCollection(
+#'      microbiomeData::DiabImmune, 
+#'      "16S (V4) Genus", 
+#'      format = "Collection"
+#' )
 #' @param object A Microbiome Dataset
 #' @param collectionName The name of the collection to return
 #' @param format The format of the collection to return. Currently supported options are "AbundanceData", "phyloseq" and "Collection".
diff --git a/R/reexports-microbiomeComputations.R b/R/reexports-microbiomeComputations.R
index ec3e1bf..3ad2fab 100644
--- a/R/reexports-microbiomeComputations.R
+++ b/R/reexports-microbiomeComputations.R
@@ -3,7 +3,10 @@
 #' This function returns abundances, ranked by a selected ranking function
 #' 
 #' @examples
-#' rankedAbundOutput <- rankedAbundance(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = "median")
+#' rankedAbundOutput <- rankedAbundance(
+#'      getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), 
+#'      method = "median"
+#' )
 #' @param data AbundanceData object
 #' @param method string defining the ranking strategy by which to order the taxa. Accepted values are 'median','max','q3',and 'variance'. Note that taxa that return a value of 0 for a given method will not be included in the results.
 #' @param cutoff integer indicating the maximium number of taxa to be kept after ranking.
@@ -19,7 +22,10 @@ microbiomeComputations::rankedAbundance
 #' This function returns alpha diversity values for each sample.
 #' 
 #' @examples
-#' alphaDivOutput <- alphaDiv(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = "shannon")
+#' alphaDivOutput <- alphaDiv(
+#'      getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), 
+#'      method = "shannon"
+#' )
 #' @param data AbundanceData object
 #' @param method string defining the the alpha diversity method. Accepted values are 'shannon','simpson', and 'evenness'
 #' @param verbose boolean indicating if timed logging is desired
@@ -34,7 +40,11 @@ microbiomeComputations::alphaDiv
 #' This function returns pcoa coordinates calculated from the beta diversity dissimilarity matrix.
 #' 
 #' @examples 
-#' betaDivOutput <- betaDiv(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = "bray", k = 2)
+#' betaDivOutput <- betaDiv(
+#'      getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), 
+#'      method = "bray", 
+#'      k = 2
+#' )
 #' @param data AbundanceData object
 #' @param method string defining the the beta diversity dissimilarity method. Accepted values are 'bray','jaccard', and 'jsd'
 #' @param k integer determining the number of pcoa axes to return
@@ -50,10 +60,28 @@ microbiomeComputations::betaDiv
 #' This function returns correlation coefficients for variables in one dataset against variables in a second dataset
 #' 
 #' @examples
-#' diabImmune_genus <- getCollection(microbiomeData::DiabImmune, "16S (V4) Genus", continuousMetadataOnly = TRUE)
-#' correlationDT <- correlation(diabImmune_genus, method = 'spearman', format = 'data.table')
-#' correlationOutput <- correlation(diabImmune_genus, method = 'spearman', format = 'ComputeResult')
-#' alsoCorrelationDT <- getComputeResult(correlationOutput, "data.table")
+#' diabImmune_genus <- getCollection(
+#'      microbiomeData::DiabImmune, 
+#'      "16S (V4) Genus", 
+#'      continuousMetadataOnly = TRUE
+#' )
+#' 
+#' correlationDT <- correlation(
+#'      diabImmune_genus, 
+#'      method = 'spearman', 
+#'      format = 'data.table'
+#' )
+#' 
+#' correlationOutput <- correlation(
+#'      diabImmune_genus, 
+#'      method = 'spearman', 
+#'      format = 'ComputeResult'
+#' )
+#' 
+#' alsoCorrelationDT <- getComputeResult(
+#'      correlationOutput, 
+#'      "data.table"
+#' )
 #' @param data1 first dataset. A data.table
 #' @param data2 second dataset. A data.table
 #' @param method string defining the type of correlation to run. 
@@ -74,9 +102,22 @@ veupathUtils::correlation
 #' convenience wrapper around veupathUtils::correlation, with the exception that it additionally supports sparcc.
 #' 
 #' @examples
-#' correlationDT <- selfCorrelation(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = 'sparcc', format = 'data.table')
-#' correlationOutput <- selfCorrelation(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = 'sparcc', format = 'ComputeResult')
-#' alsoCorrelationDT <- getComputeResult(correlationOutput, "data.table")
+#' correlationDT <- selfCorrelation(
+#'      getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), 
+#'      method = 'sparcc', 
+#'      format = 'data.table'
+#' )
+#' 
+#' correlationOutput <- selfCorrelation(
+#'      getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), 
+#'      method = 'sparcc', 
+#'      format = 'ComputeResult'
+#' )
+#' 
+#' alsoCorrelationDT <- getComputeResult(
+#'      correlationOutput, 
+#'      "data.table"
+#' )
 #' @param data An AbundanceData object
 #' @param method string defining the type of correlation to run. The currently supported values are 'spearman','pearson' and 'sparcc'
 #' @param format string defining the desired format of the result. The currently supported values are 'data.table' and 'ComputeResult'.
diff --git a/man/alphaDiv-methods.Rd b/man/alphaDiv-methods.Rd
index aac6518..61e9362 100644
--- a/man/alphaDiv-methods.Rd
+++ b/man/alphaDiv-methods.Rd
@@ -24,5 +24,8 @@ ComputeResult object
 This function returns alpha diversity values for each sample.
 }
 \examples{
-alphaDivOutput <- alphaDiv(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = "shannon")
+alphaDivOutput <- alphaDiv(
+     getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), 
+     method = "shannon"
+)
 }
diff --git a/man/betaDiv-methods.Rd b/man/betaDiv-methods.Rd
index 7cd9852..d70ac6a 100644
--- a/man/betaDiv-methods.Rd
+++ b/man/betaDiv-methods.Rd
@@ -27,5 +27,9 @@ ComputeResult object
 This function returns pcoa coordinates calculated from the beta diversity dissimilarity matrix.
 }
 \examples{
-betaDivOutput <- betaDiv(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = "bray", k = 2)
+betaDivOutput <- betaDiv(
+     getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), 
+     method = "bray", 
+     k = 2
+)
 }
diff --git a/man/correlation-methods.Rd b/man/correlation-methods.Rd
index 41b360a..a344034 100644
--- a/man/correlation-methods.Rd
+++ b/man/correlation-methods.Rd
@@ -35,8 +35,26 @@ data.frame with correlation coefficients or a ComputeResult object
 This function returns correlation coefficients for variables in one dataset against variables in a second dataset
 }
 \examples{
-diabImmune_genus <- getCollection(microbiomeData::DiabImmune, "16S (V4) Genus", continuousMetadataOnly = TRUE)
-correlationDT <- correlation(diabImmune_genus, method = 'spearman', format = 'data.table')
-correlationOutput <- correlation(diabImmune_genus, method = 'spearman', format = 'ComputeResult')
-alsoCorrelationDT <- getComputeResult(correlationOutput, "data.table")
+diabImmune_genus <- getCollection(
+     microbiomeData::DiabImmune, 
+     "16S (V4) Genus", 
+     continuousMetadataOnly = TRUE
+)
+
+correlationDT <- correlation(
+     diabImmune_genus, 
+     method = 'spearman', 
+     format = 'data.table'
+)
+
+correlationOutput <- correlation(
+     diabImmune_genus, 
+     method = 'spearman', 
+     format = 'ComputeResult'
+)
+
+alsoCorrelationDT <- getComputeResult(
+     correlationOutput, 
+     "data.table"
+)
 }
diff --git a/man/getCollection.Rd b/man/getCollection.Rd
index fff77bd..c9a9f3f 100644
--- a/man/getCollection.Rd
+++ b/man/getCollection.Rd
@@ -37,8 +37,28 @@ Get a collection from the Microbiome Dataset. The collection will be returned
 as an AbundanceData, phyloseq, or Collection object.
 }
 \examples{
-genus <- getCollection(microbiomeData::DiabImmune, "16S (V4) Genus")
-genus_phyloseq <- getCollection(microbiomeData::DiabImmune, "16S (V4) Genus", format = "phyloseq")
-genus_continuous <- getCollection(microbiomeData::DiabImmune, "16S (V4) Genus", continuousMetadataOnly = TRUE) ## to pass to correlation method
-genus_collection <- getCollection(microbiomeData::DiabImmune, "16S (V4) Genus", format = "Collection") ## with no metadata
+genus <- getCollection(
+     microbiomeData::DiabImmune, 
+     "16S (V4) Genus"
+)
+
+genus_phyloseq <- getCollection(
+     microbiomeData::DiabImmune, 
+     "16S (V4) Genus", 
+     format = "phyloseq"
+)
+
+## to pass to correlation method, we want only continuous metadata
+genus_continuous <- getCollection(
+     microbiomeData::DiabImmune, 
+     "16S (V4) Genus", 
+     continuousMetadataOnly = TRUE
+) 
+
+## with no metadata
+genus_collection <- getCollection(
+     microbiomeData::DiabImmune, 
+     "16S (V4) Genus", 
+     format = "Collection"
+)
 }
diff --git a/man/getComputeResultWithMetadata.Rd b/man/getComputeResultWithMetadata.Rd
index de6eb36..7e7ce69 100644
--- a/man/getComputeResultWithMetadata.Rd
+++ b/man/getComputeResultWithMetadata.Rd
@@ -56,5 +56,10 @@ alphaDivOutput <- MicrobiomeDB::alphaDiv(
      method='shannon', 
      verbose=FALSE
 )
-alphaDivDT <- getComputeResultWithMetadata(alphaDivOutput, microbiomeData::DiabImmune, metadataVariables = c('country', 'delivery_mode'))
+
+alphaDivDT <- getComputeResultWithMetadata(
+     alphaDivOutput, 
+     microbiomeData::DiabImmune, 
+     metadataVariables = c('country', 'delivery_mode')
+)
 }
diff --git a/man/rankedAbundance-methods.Rd b/man/rankedAbundance-methods.Rd
index 6e62df3..f51194c 100644
--- a/man/rankedAbundance-methods.Rd
+++ b/man/rankedAbundance-methods.Rd
@@ -27,5 +27,8 @@ ComputeResult object
 This function returns abundances, ranked by a selected ranking function
 }
 \examples{
-rankedAbundOutput <- rankedAbundance(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = "median")
+rankedAbundOutput <- rankedAbundance(
+     getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), 
+     method = "median"
+)
 }
diff --git a/man/selfCorrelation-methods.Rd b/man/selfCorrelation-methods.Rd
index 290b21c..36b6b8f 100644
--- a/man/selfCorrelation-methods.Rd
+++ b/man/selfCorrelation-methods.Rd
@@ -31,7 +31,20 @@ This function returns correlation coefficients for variables in one AbundanceDat
 convenience wrapper around veupathUtils::correlation, with the exception that it additionally supports sparcc.
 }
 \examples{
-correlationDT <- selfCorrelation(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = 'sparcc', format = 'data.table')
-correlationOutput <- selfCorrelation(getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), method = 'sparcc', format = 'ComputeResult')
-alsoCorrelationDT <- getComputeResult(correlationOutput, "data.table")
+correlationDT <- selfCorrelation(
+     getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), 
+     method = 'sparcc', 
+     format = 'data.table'
+)
+
+correlationOutput <- selfCorrelation(
+     getCollection(microbiomeData::DiabImmune, "16S (V4) Genus"), 
+     method = 'sparcc', 
+     format = 'ComputeResult'
+)
+
+alsoCorrelationDT <- getComputeResult(
+     correlationOutput, 
+     "data.table"
+)
 }