Skip to content

Commit

Permalink
Merge pull request #31 from ncats/dev
Browse files Browse the repository at this point in the history
update default background value for enrichment to database, patch fin…
  • Loading branch information
Mathelab authored Apr 6, 2022
2 parents 64a2d6c + e36c7f2 commit bf4ff54
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 63 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
.Rhistory
.RData
inst/shinyApp/db.properties.template

dbprops.txt
# IDE - VSCode
.vscode/
# !.vscode/settings.json
Expand Down
73 changes: 42 additions & 31 deletions R/ReturnPathwaysEnrich_InputAnalytes.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@
#' @param min_path_size the minimum number of pathway members (genes and metabolites) to include the pathway in the output (default = 5)
#' @param max_path_size the maximum number of pathway memnbers (genes and metaboltes) to include the pathway in the output (default = 150)
#' @param background_type type of background that is input by the user. Opions are "database" if user wants all
#' analytes from the RaMP database will be used; "file", if user wnats to input a file with a list of background
#' analytes from the RaMP database will be used; "file", if user wants to input a file with a list of background
#' analytes; "list", if user wants to input a vector of analyte IDs; "biospecimen", if user wants to specify a
#' biospecimen type (e.g. blood, adipose, etc.) and have those biospecimen-specific analytes used. For genes,
#' biospecimen type (e.g. blood, adipose tissue, etc.) and have those biospecimen-specific analytes used. For genes,
#' only the "database" option is used.
#' @param background background to be used for Fisher's tests. If parameter 'background_type="database"', this parameter
#' is ignored (default=NULL); if parameter 'background_type= "file"', then 'background' should be a file name (with
#' is ignored (default="database"); if parameter 'background_type= "file"', then 'background' should be a file name (with
#' directory); if 'background_type="list"', then 'background' should be a vector of RaMP IDs; if 'backgroud_type="biospecimen"'
#' then users should specify one of the following: "Blood", "Adipose", "Heart", "Urine", "Brain", "Liver", "Kidney",
#' then users should specify one of the following: "Blood", "Adipose tissue", "Heart", "Urine", "Brain", "Liver", "Kidney",
#' "Saliva", and "Feces"
#' @return a dataframe with columns containing pathway ID, fisher's p value, user analytes in pathway, and total analytes in pathway

Expand All @@ -25,7 +25,7 @@ runFisherTest <- function(analytes,
NameOrIds = "ids",
analyte_type = "metabolites",
MCall = F, alternative = "less", min_path_size=5, max_path_size=150,
background_type="database", background="NULL") {
background_type="database", background="database") {

now <- proc.time()
print("Fisher Testing ......")
Expand Down Expand Up @@ -462,16 +462,18 @@ runFisherTest <- function(analytes,
#' @param max_path_size the maximum number of pathway memnbers (genes and metaboltes) to include the pathway in the output (default = 150)
#' @param includeRaMPids include internal RaMP identifiers (default is "FALSE")
#' @param background_type type of background that is input by the user. Opions are "database" if user wants all
#' analytes from the RaMP database will be used; "file", if user wnats to input a file with a list of background
#' analytes from the RaMP database to be used as background; "file", if user wnats to input a file path with a list of background
#' analytes; "list", if user wants to input a vector of analyte IDs; "biospecimen", if user wants to specify a
#' biospecimen type (e.g. blood, adipose, etc.) and have those biospecimen-specific analytes used. For genes,
#' biospecimen type (e.g. blood, adipose tissue, etc.) and have those biospecimen-specific analytes used. For genes,
#' only the "database" option is used.
#' @param background background to be used for Fisher's tests. If parameter 'background_type="database"', this parameter
#' is ignored (default=NULL); if parameter 'background_type= "file"', then 'background' should be a file name (with
#' is ignored (default="database"); if parameter 'background_type= "file"', then 'background' should be a file name (with
#' directory); if 'background_type="list"', then 'background' should be a vector of RaMP IDs; if 'backgroud_type="biospecimen"'
#' then users should specify one of the following: "Blood", "Adipose", "Heart", "Urine", "Brain", "Liver", "Kidney",
#' then users should specify one of the following: "Blood", "Adipose tissue", "Heart", "Urine", "Brain", "Liver", "Kidney",
#' "Saliva", and "Feces"
#' @return a list containing two entries: [[1]] fishresults, a dataframe containing pathways with Fisher's p values (raw and with FDR and Holm adjustment), number of user analytes in pathway, total number of analytes in pathway, and pathway source ID/database. [[2]] analyte_type, a string specifying the type of analyte input into the function ("genes", "metabolites", or "both")
#' @return a list containing two entries: [[1]] fishresults, a dataframe containing pathways with Fisher's p values
#' (raw and with FDR and Holm adjustment), number of user analytes in pathway, total number of analytes in pathway,
#' and pathway source ID/database. [[2]] analyte_type, a string specifying the type of analyte input into the function ("genes", "metabolites", or "both")
#' @examples
#' \dontrun{
#' pkg.globals <- setConnectionToRaMP(
Expand All @@ -494,7 +496,7 @@ runCombinedFisherTest <- function(analytes,
max_path_size = 150,
includeRaMPids = FALSE,
background_type = "database",
background = NULL) {
background = "database") {

G <- M <- 0

Expand All @@ -520,24 +522,24 @@ runCombinedFisherTest <- function(analytes,
}

# Grab pathways that contain genes to run Fisher on genes
## fishgene <- pathwaydf[grep("RAMP_G_", pathwaydf$rampId), ]
## Genes are not evaluated if custom background is specified
if(background_type == "database"){
print("Running Fisher's tests on genes")
outgene <- runFisherTest(
analytes = analytes,
analyte_type = "genes",
total_genes = total_genes,
MCall = MCall,
min_path_size = min_path_size,
max_path_size = max_path_size
)
pathwaydf_gene <- outgene[[2]]
outgene <- outgene[[1]]
}else{
outgene <- NULL
pathwaydf_gene <- NULL
}
## fishgene <- pathwaydf[grep("RAMP_G_", pathwaydf$rampId), ]
## Genes are not evaluated if custom background is specified
if(background_type == "database"){
print("Running Fisher's tests on genes")
outgene <- runFisherTest(
analytes = analytes,
analyte_type = "genes",
total_genes = total_genes,
MCall = MCall,
min_path_size = min_path_size,
max_path_size = max_path_size
)
pathwaydf_gene <- outgene[[2]]
outgene <- outgene[[1]]
}else{
outgene <- NULL
pathwaydf_gene <- NULL
}

# if no ids map to pathways, return an empty result.
if((is.null(pathwaydf_metab) || nrow(pathwaydf_metab) < 1) &&
Expand Down Expand Up @@ -568,7 +570,7 @@ runCombinedFisherTest <- function(analytes,
out[keepers, ],
by = "pathwayRampId"
)
} else if (!is.null(outgene) & is.null(outmetab)) {
} else if (!is.null(outgene) && is.null(outmetab)) {
out <- outgene
fdr <- stats::p.adjust(out$Pval, method = "fdr")
out <- cbind(out, fdr)
Expand Down Expand Up @@ -821,11 +823,20 @@ getPathwayFromAnalyte <- function(analytes = "none",

findCluster <- function(fishers_df, perc_analyte_overlap = 0.5,
min_pathway_tocluster = 2, perc_pathway_overlap = 0.5) {

print("Clustering pathways...")

if (perc_analyte_overlap <= 0 || perc_analyte_overlap >= 1 ||
perc_pathway_overlap <= 0 || perc_pathway_overlap >= 1) {
return(NULL)
warning("No Clustering. perc_analyte_overlap and percent_pathway_overlap must bee in the range of (0,1), exclusive (not exactly 0 or 1).")
return(fishers_df)
}

if(is.null(fishers_df$fishresults) || nrow(fishers_df$fishresults) < 1) {
warning("The contained input pathway dataframe is empty (fishers_df$fishresults). Returning input result without clustering.")
return(fishers_df)
}

analyte_type <- fishers_df$analyte_type
fishers_df <- fishers_df$fishresults
list_pathways <- fishers_df %>% dplyr::pull("pathwayId")
Expand Down
22 changes: 12 additions & 10 deletions R/rampChemClassQueries.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@
#'
#' @param mets a list object of source prepended metaboite ids, representing a metabolite set of interest
#' @param background an optional list of source prepended metaboite ids to be used as the background reference of
#' metabolites for enrichment. The background can be either a list of ids or can be a file name containing the id list,
#' one id per column, no file header rows.
#' metabolites for enrichment. The background can be either a list of ids, a file name containing the id list,
#' one id per column (no file header row) or a specificed biospecimen type (available biospecimen types: "Blood",
#' "Adipose tissue", "Heart", "Urine", "Brain", "Liver", "Kidney","Saliva", or "Feces").
#' @param background_type one of 'database' (all analytes in the RaMP Database), 'list' (a list of input ids),
#' or 'file' in which case the background parameter will be a file name, or 'biospecimin' where the specified background parameter is
#' a RaMP HMDB metabolite ontology term (use RaMP::getOntologies() to see a list of available ontology terms).
#' or 'file' in which case the background parameter will be a file path, or 'biospecimen' where the specified background parameter is
#' a RaMP HMDB metabolite ontology term (see background parameter, above, for the most common biospecimen background values).
#' @param includeRaMPids include internal RaMP identifiers (default is "FALSE")
#' @return Returns chemcial class information data including class count tallies and comparisons between metabolites of interest and the metabolite population,
#' metabolite mappings to classes, and query summary report indicating the number of input metabolites that were resolved and listing those metabolite ids
Expand Down Expand Up @@ -65,7 +66,7 @@
#' metClassResult$query_report
#'}
#' @export
chemicalClassSurvey <- function(mets, background = "NULL", background_type="database", includeRaMPids = FALSE){
chemicalClassSurvey <- function(mets, background = "database", background_type="database", includeRaMPids = FALSE){
conn <- connectToRaMP()
print("Starting Chemical Class Survey")

Expand Down Expand Up @@ -183,11 +184,12 @@ chemicalClassSurvey <- function(mets, background = "NULL", background_type="data
#'
#' @param mets a vector of source prepended metabolite ids
#' @param background an optional list of source prepended metaboite ids to be used as the background reference of
#' metabolites for enrichment. The background can be either a list of ids or can be a file name containing the id list,
#' one id per column, no file header rows.
#' metabolites for enrichment. The background can be either a list of ids, a file name containing the id list,
#' one id per column (no file header row) or a specificed biospecimen type (available biospecimen types: "Blood",
#' "Adipose tissue", "Heart", "Urine", "Brain", "Liver", "Kidney","Saliva", or "Feces").
#' @param background_type one of 'database' (all analytes in the RaMP Database), 'list' (a list of input ids),
#' or 'file' in which case the background parameter will be a file name, or 'biospecimin' where the specified background parameter is
#' a RaMP HMDB metabolite ontology term (use RaMP::getOntologies() to see a list of available ontology terms).
#' or 'file' in which case the background parameter will be a file path, or 'biospecimen' where the specified background parameter is
#' a RaMP HMDB metabolite ontology term (see background parameter, above. for the most common biospecimen background values).
#' @return a list of dataframes, each holding chemical classs enrichment statistics for specific chemical classification systems,
#' such as HMDB Classyfire class categories and LIPIDMAPS class categories. The results list chemical classes, metabolite hits counts,
#' Fisher Exact p-values and Benjamini-Hochberg corrected p-values (FDR estimates)
Expand Down Expand Up @@ -216,7 +218,7 @@ chemicalClassSurvey <- function(mets, background = "NULL", background_type="data
#' enrichedClassStats <- chemicalClassEnrichment(mets = metList)
#'}
#' @export
chemicalClassEnrichment <- function(mets, background = "NULL", background_type = "list") {
chemicalClassEnrichment <- function(mets, background = "database", background_type = "database") {
print("Starting Chemical Class Enrichment")

classData <- chemicalClassSurvey(mets = mets,
Expand Down
15 changes: 10 additions & 5 deletions man/chemicalClassEnrichment.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 6 additions & 5 deletions man/chemicalClassSurvey.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 8 additions & 6 deletions man/runCombinedFisherTest.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions man/runFisherTest.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit bf4ff54

Please sign in to comment.