diff --git a/.gitignore b/.gitignore index 81da5b07..3afddc3b 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,7 @@ .Rhistory .RData inst/shinyApp/db.properties.template - +dbprops.txt # IDE - VSCode .vscode/ # !.vscode/settings.json diff --git a/R/ReturnPathwaysEnrich_InputAnalytes.R b/R/ReturnPathwaysEnrich_InputAnalytes.R index 556e70db..2d9d5266 100644 --- a/R/ReturnPathwaysEnrich_InputAnalytes.R +++ b/R/ReturnPathwaysEnrich_InputAnalytes.R @@ -9,14 +9,14 @@ #' @param min_path_size the minimum number of pathway members (genes and metabolites) to include the pathway in the output (default = 5) #' @param max_path_size the maximum number of pathway memnbers (genes and metaboltes) to include the pathway in the output (default = 150) #' @param background_type type of background that is input by the user. Opions are "database" if user wants all -#' analytes from the RaMP database will be used; "file", if user wnats to input a file with a list of background +#' analytes from the RaMP database will be used; "file", if user wants to input a file with a list of background #' analytes; "list", if user wants to input a vector of analyte IDs; "biospecimen", if user wants to specify a -#' biospecimen type (e.g. blood, adipose, etc.) and have those biospecimen-specific analytes used. For genes, +#' biospecimen type (e.g. blood, adipose tissue, etc.) and have those biospecimen-specific analytes used. For genes, #' only the "database" option is used. #' @param background background to be used for Fisher's tests. If parameter 'background_type="database"', this parameter -#' is ignored (default=NULL); if parameter 'background_type= "file"', then 'background' should be a file name (with +#' is ignored (default="database"); if parameter 'background_type= "file"', then 'background' should be a file name (with #' directory); if 'background_type="list"', then 'background' should be a vector of RaMP IDs; if 'backgroud_type="biospecimen"' -#' then users should specify one of the following: "Blood", "Adipose", "Heart", "Urine", "Brain", "Liver", "Kidney", +#' then users should specify one of the following: "Blood", "Adipose tissue", "Heart", "Urine", "Brain", "Liver", "Kidney", #' "Saliva", and "Feces" #' @return a dataframe with columns containing pathway ID, fisher's p value, user analytes in pathway, and total analytes in pathway @@ -25,7 +25,7 @@ runFisherTest <- function(analytes, NameOrIds = "ids", analyte_type = "metabolites", MCall = F, alternative = "less", min_path_size=5, max_path_size=150, - background_type="database", background="NULL") { + background_type="database", background="database") { now <- proc.time() print("Fisher Testing ......") @@ -462,16 +462,18 @@ runFisherTest <- function(analytes, #' @param max_path_size the maximum number of pathway memnbers (genes and metaboltes) to include the pathway in the output (default = 150) #' @param includeRaMPids include internal RaMP identifiers (default is "FALSE") #' @param background_type type of background that is input by the user. Opions are "database" if user wants all -#' analytes from the RaMP database will be used; "file", if user wnats to input a file with a list of background +#' analytes from the RaMP database to be used as background; "file", if user wnats to input a file path with a list of background #' analytes; "list", if user wants to input a vector of analyte IDs; "biospecimen", if user wants to specify a -#' biospecimen type (e.g. blood, adipose, etc.) and have those biospecimen-specific analytes used. For genes, +#' biospecimen type (e.g. blood, adipose tissue, etc.) and have those biospecimen-specific analytes used. For genes, #' only the "database" option is used. #' @param background background to be used for Fisher's tests. If parameter 'background_type="database"', this parameter -#' is ignored (default=NULL); if parameter 'background_type= "file"', then 'background' should be a file name (with +#' is ignored (default="database"); if parameter 'background_type= "file"', then 'background' should be a file name (with #' directory); if 'background_type="list"', then 'background' should be a vector of RaMP IDs; if 'backgroud_type="biospecimen"' -#' then users should specify one of the following: "Blood", "Adipose", "Heart", "Urine", "Brain", "Liver", "Kidney", +#' then users should specify one of the following: "Blood", "Adipose tissue", "Heart", "Urine", "Brain", "Liver", "Kidney", #' "Saliva", and "Feces" -#' @return a list containing two entries: [[1]] fishresults, a dataframe containing pathways with Fisher's p values (raw and with FDR and Holm adjustment), number of user analytes in pathway, total number of analytes in pathway, and pathway source ID/database. [[2]] analyte_type, a string specifying the type of analyte input into the function ("genes", "metabolites", or "both") +#' @return a list containing two entries: [[1]] fishresults, a dataframe containing pathways with Fisher's p values +#' (raw and with FDR and Holm adjustment), number of user analytes in pathway, total number of analytes in pathway, +#' and pathway source ID/database. [[2]] analyte_type, a string specifying the type of analyte input into the function ("genes", "metabolites", or "both") #' @examples #' \dontrun{ #' pkg.globals <- setConnectionToRaMP( @@ -494,7 +496,7 @@ runCombinedFisherTest <- function(analytes, max_path_size = 150, includeRaMPids = FALSE, background_type = "database", - background = NULL) { + background = "database") { G <- M <- 0 @@ -520,24 +522,24 @@ runCombinedFisherTest <- function(analytes, } # Grab pathways that contain genes to run Fisher on genes - ## fishgene <- pathwaydf[grep("RAMP_G_", pathwaydf$rampId), ] - ## Genes are not evaluated if custom background is specified - if(background_type == "database"){ - print("Running Fisher's tests on genes") - outgene <- runFisherTest( - analytes = analytes, - analyte_type = "genes", - total_genes = total_genes, - MCall = MCall, - min_path_size = min_path_size, - max_path_size = max_path_size - ) - pathwaydf_gene <- outgene[[2]] - outgene <- outgene[[1]] - }else{ - outgene <- NULL - pathwaydf_gene <- NULL - } + ## fishgene <- pathwaydf[grep("RAMP_G_", pathwaydf$rampId), ] + ## Genes are not evaluated if custom background is specified + if(background_type == "database"){ + print("Running Fisher's tests on genes") + outgene <- runFisherTest( + analytes = analytes, + analyte_type = "genes", + total_genes = total_genes, + MCall = MCall, + min_path_size = min_path_size, + max_path_size = max_path_size + ) + pathwaydf_gene <- outgene[[2]] + outgene <- outgene[[1]] + }else{ + outgene <- NULL + pathwaydf_gene <- NULL + } # if no ids map to pathways, return an empty result. if((is.null(pathwaydf_metab) || nrow(pathwaydf_metab) < 1) && @@ -568,7 +570,7 @@ runCombinedFisherTest <- function(analytes, out[keepers, ], by = "pathwayRampId" ) - } else if (!is.null(outgene) & is.null(outmetab)) { + } else if (!is.null(outgene) && is.null(outmetab)) { out <- outgene fdr <- stats::p.adjust(out$Pval, method = "fdr") out <- cbind(out, fdr) @@ -821,11 +823,20 @@ getPathwayFromAnalyte <- function(analytes = "none", findCluster <- function(fishers_df, perc_analyte_overlap = 0.5, min_pathway_tocluster = 2, perc_pathway_overlap = 0.5) { + print("Clustering pathways...") + if (perc_analyte_overlap <= 0 || perc_analyte_overlap >= 1 || perc_pathway_overlap <= 0 || perc_pathway_overlap >= 1) { - return(NULL) + warning("No Clustering. perc_analyte_overlap and percent_pathway_overlap must bee in the range of (0,1), exclusive (not exactly 0 or 1).") + return(fishers_df) } + + if(is.null(fishers_df$fishresults) || nrow(fishers_df$fishresults) < 1) { + warning("The contained input pathway dataframe is empty (fishers_df$fishresults). Returning input result without clustering.") + return(fishers_df) + } + analyte_type <- fishers_df$analyte_type fishers_df <- fishers_df$fishresults list_pathways <- fishers_df %>% dplyr::pull("pathwayId") diff --git a/R/rampChemClassQueries.R b/R/rampChemClassQueries.R index b7cb2728..43aef833 100644 --- a/R/rampChemClassQueries.R +++ b/R/rampChemClassQueries.R @@ -4,11 +4,12 @@ #' #' @param mets a list object of source prepended metaboite ids, representing a metabolite set of interest #' @param background an optional list of source prepended metaboite ids to be used as the background reference of -#' metabolites for enrichment. The background can be either a list of ids or can be a file name containing the id list, -#' one id per column, no file header rows. +#' metabolites for enrichment. The background can be either a list of ids, a file name containing the id list, +#' one id per column (no file header row) or a specificed biospecimen type (available biospecimen types: "Blood", +#' "Adipose tissue", "Heart", "Urine", "Brain", "Liver", "Kidney","Saliva", or "Feces"). #' @param background_type one of 'database' (all analytes in the RaMP Database), 'list' (a list of input ids), -#' or 'file' in which case the background parameter will be a file name, or 'biospecimin' where the specified background parameter is -#' a RaMP HMDB metabolite ontology term (use RaMP::getOntologies() to see a list of available ontology terms). +#' or 'file' in which case the background parameter will be a file path, or 'biospecimen' where the specified background parameter is +#' a RaMP HMDB metabolite ontology term (see background parameter, above, for the most common biospecimen background values). #' @param includeRaMPids include internal RaMP identifiers (default is "FALSE") #' @return Returns chemcial class information data including class count tallies and comparisons between metabolites of interest and the metabolite population, #' metabolite mappings to classes, and query summary report indicating the number of input metabolites that were resolved and listing those metabolite ids @@ -65,7 +66,7 @@ #' metClassResult$query_report #'} #' @export -chemicalClassSurvey <- function(mets, background = "NULL", background_type="database", includeRaMPids = FALSE){ +chemicalClassSurvey <- function(mets, background = "database", background_type="database", includeRaMPids = FALSE){ conn <- connectToRaMP() print("Starting Chemical Class Survey") @@ -183,11 +184,12 @@ chemicalClassSurvey <- function(mets, background = "NULL", background_type="data #' #' @param mets a vector of source prepended metabolite ids #' @param background an optional list of source prepended metaboite ids to be used as the background reference of -#' metabolites for enrichment. The background can be either a list of ids or can be a file name containing the id list, -#' one id per column, no file header rows. +#' metabolites for enrichment. The background can be either a list of ids, a file name containing the id list, +#' one id per column (no file header row) or a specificed biospecimen type (available biospecimen types: "Blood", +#' "Adipose tissue", "Heart", "Urine", "Brain", "Liver", "Kidney","Saliva", or "Feces"). #' @param background_type one of 'database' (all analytes in the RaMP Database), 'list' (a list of input ids), -#' or 'file' in which case the background parameter will be a file name, or 'biospecimin' where the specified background parameter is -#' a RaMP HMDB metabolite ontology term (use RaMP::getOntologies() to see a list of available ontology terms). +#' or 'file' in which case the background parameter will be a file path, or 'biospecimen' where the specified background parameter is +#' a RaMP HMDB metabolite ontology term (see background parameter, above. for the most common biospecimen background values). #' @return a list of dataframes, each holding chemical classs enrichment statistics for specific chemical classification systems, #' such as HMDB Classyfire class categories and LIPIDMAPS class categories. The results list chemical classes, metabolite hits counts, #' Fisher Exact p-values and Benjamini-Hochberg corrected p-values (FDR estimates) @@ -216,7 +218,7 @@ chemicalClassSurvey <- function(mets, background = "NULL", background_type="data #' enrichedClassStats <- chemicalClassEnrichment(mets = metList) #'} #' @export -chemicalClassEnrichment <- function(mets, background = "NULL", background_type = "list") { +chemicalClassEnrichment <- function(mets, background = "database", background_type = "database") { print("Starting Chemical Class Enrichment") classData <- chemicalClassSurvey(mets = mets, diff --git a/man/chemicalClassEnrichment.Rd b/man/chemicalClassEnrichment.Rd index 142f1548..a0307b23 100644 --- a/man/chemicalClassEnrichment.Rd +++ b/man/chemicalClassEnrichment.Rd @@ -5,18 +5,23 @@ \title{Returns chemical class information comparing a metabolite subset to a metabolite population, including Fisher Exact Test enrichment p-values and FDR values.} \usage{ -chemicalClassEnrichment(mets, background = "NULL", background_type = "list") +chemicalClassEnrichment( + mets, + background = "database", + background_type = "database" +) } \arguments{ \item{mets}{a vector of source prepended metabolite ids} \item{background}{an optional list of source prepended metaboite ids to be used as the background reference of -metabolites for enrichment. The background can be either a list of ids or can be a file name containing the id list, -one id per column, no file header rows.} +metabolites for enrichment. The background can be either a list of ids, a file name containing the id list, +one id per column (no file header row) or a specificed biospecimen type (available biospecimen types: "Blood", +"Adipose tissue", "Heart", "Urine", "Brain", "Liver", "Kidney","Saliva", or "Feces").} \item{background_type}{one of 'database' (all analytes in the RaMP Database), 'list' (a list of input ids), -or 'file' in which case the background parameter will be a file name, or 'biospecimin' where the specified background parameter is -a RaMP HMDB metabolite ontology term (use RaMP::getOntologies() to see a list of available ontology terms).} +or 'file' in which case the background parameter will be a file path, or 'biospecimen' where the specified background parameter is +a RaMP HMDB metabolite ontology term (see background parameter, above. for the most common biospecimen background values).} } \value{ a list of dataframes, each holding chemical classs enrichment statistics for specific chemical classification systems, diff --git a/man/chemicalClassSurvey.Rd b/man/chemicalClassSurvey.Rd index 426f15c6..cf85cc11 100644 --- a/man/chemicalClassSurvey.Rd +++ b/man/chemicalClassSurvey.Rd @@ -6,7 +6,7 @@ \usage{ chemicalClassSurvey( mets, - background = "NULL", + background = "database", background_type = "database", includeRaMPids = FALSE ) @@ -15,12 +15,13 @@ chemicalClassSurvey( \item{mets}{a list object of source prepended metaboite ids, representing a metabolite set of interest} \item{background}{an optional list of source prepended metaboite ids to be used as the background reference of -metabolites for enrichment. The background can be either a list of ids or can be a file name containing the id list, -one id per column, no file header rows.} +metabolites for enrichment. The background can be either a list of ids, a file name containing the id list, +one id per column (no file header row) or a specificed biospecimen type (available biospecimen types: "Blood", +"Adipose tissue", "Heart", "Urine", "Brain", "Liver", "Kidney","Saliva", or "Feces").} \item{background_type}{one of 'database' (all analytes in the RaMP Database), 'list' (a list of input ids), -or 'file' in which case the background parameter will be a file name, or 'biospecimin' where the specified background parameter is -a RaMP HMDB metabolite ontology term (use RaMP::getOntologies() to see a list of available ontology terms).} +or 'file' in which case the background parameter will be a file path, or 'biospecimen' where the specified background parameter is +a RaMP HMDB metabolite ontology term (see background parameter, above, for the most common biospecimen background values).} \item{includeRaMPids}{include internal RaMP identifiers (default is "FALSE")} } diff --git a/man/runCombinedFisherTest.Rd b/man/runCombinedFisherTest.Rd index 75db6c90..1427a4b4 100644 --- a/man/runCombinedFisherTest.Rd +++ b/man/runCombinedFisherTest.Rd @@ -16,7 +16,7 @@ runCombinedFisherTest( max_path_size = 150, includeRaMPids = FALSE, background_type = "database", - background = NULL + background = "database" ) } \arguments{ @@ -40,19 +40,21 @@ runCombinedFisherTest( \item{includeRaMPids}{include internal RaMP identifiers (default is "FALSE")} \item{background_type}{type of background that is input by the user. Opions are "database" if user wants all -analytes from the RaMP database will be used; "file", if user wnats to input a file with a list of background +analytes from the RaMP database to be used as background; "file", if user wnats to input a file path with a list of background analytes; "list", if user wants to input a vector of analyte IDs; "biospecimen", if user wants to specify a -biospecimen type (e.g. blood, adipose, etc.) and have those biospecimen-specific analytes used. For genes, +biospecimen type (e.g. blood, adipose tissue, etc.) and have those biospecimen-specific analytes used. For genes, only the "database" option is used.} \item{background}{background to be used for Fisher's tests. If parameter 'background_type="database"', this parameter -is ignored (default=NULL); if parameter 'background_type= "file"', then 'background' should be a file name (with +is ignored (default="database"); if parameter 'background_type= "file"', then 'background' should be a file name (with directory); if 'background_type="list"', then 'background' should be a vector of RaMP IDs; if 'backgroud_type="biospecimen"' -then users should specify one of the following: "Blood", "Adipose", "Heart", "Urine", "Brain", "Liver", "Kidney", +then users should specify one of the following: "Blood", "Adipose tissue", "Heart", "Urine", "Brain", "Liver", "Kidney", "Saliva", and "Feces"} } \value{ -a list containing two entries: [[1]] fishresults, a dataframe containing pathways with Fisher's p values (raw and with FDR and Holm adjustment), number of user analytes in pathway, total number of analytes in pathway, and pathway source ID/database. [[2]] analyte_type, a string specifying the type of analyte input into the function ("genes", "metabolites", or "both") +a list containing two entries: [[1]] fishresults, a dataframe containing pathways with Fisher's p values +(raw and with FDR and Holm adjustment), number of user analytes in pathway, total number of analytes in pathway, +and pathway source ID/database. [[2]] analyte_type, a string specifying the type of analyte input into the function ("genes", "metabolites", or "both") } \description{ Do fisher test for only one pathway from search result diff --git a/man/runFisherTest.Rd b/man/runFisherTest.Rd index d404a5c8..e739e080 100644 --- a/man/runFisherTest.Rd +++ b/man/runFisherTest.Rd @@ -15,7 +15,7 @@ runFisherTest( min_path_size = 5, max_path_size = 150, background_type = "database", - background = "NULL" + background = "database" ) } \arguments{ @@ -36,15 +36,15 @@ runFisherTest( \item{max_path_size}{the maximum number of pathway memnbers (genes and metaboltes) to include the pathway in the output (default = 150)} \item{background_type}{type of background that is input by the user. Opions are "database" if user wants all -analytes from the RaMP database will be used; "file", if user wnats to input a file with a list of background +analytes from the RaMP database will be used; "file", if user wants to input a file with a list of background analytes; "list", if user wants to input a vector of analyte IDs; "biospecimen", if user wants to specify a -biospecimen type (e.g. blood, adipose, etc.) and have those biospecimen-specific analytes used. For genes, +biospecimen type (e.g. blood, adipose tissue, etc.) and have those biospecimen-specific analytes used. For genes, only the "database" option is used.} \item{background}{background to be used for Fisher's tests. If parameter 'background_type="database"', this parameter -is ignored (default=NULL); if parameter 'background_type= "file"', then 'background' should be a file name (with +is ignored (default="database"); if parameter 'background_type= "file"', then 'background' should be a file name (with directory); if 'background_type="list"', then 'background' should be a vector of RaMP IDs; if 'backgroud_type="biospecimen"' -then users should specify one of the following: "Blood", "Adipose", "Heart", "Urine", "Brain", "Liver", "Kidney", +then users should specify one of the following: "Blood", "Adipose tissue", "Heart", "Urine", "Brain", "Liver", "Kidney", "Saliva", and "Feces"} } \value{