From 71dfbef5927dd2bd4fcff357a961011c38e54517 Mon Sep 17 00:00:00 2001 From: johnbraisted Date: Thu, 7 Sep 2023 19:02:05 -0400 Subject: [PATCH 1/6] copy pw_enrich_dev Fisher optimizations to this patch --- R/ReturnPathwaysEnrich_InputAnalytes.R | 48 ++++++++++++++++++-------- R/plottingFunctions.R | 14 +++++--- 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/R/ReturnPathwaysEnrich_InputAnalytes.R b/R/ReturnPathwaysEnrich_InputAnalytes.R index efaca8ba..a0010eca 100644 --- a/R/ReturnPathwaysEnrich_InputAnalytes.R +++ b/R/ReturnPathwaysEnrich_InputAnalytes.R @@ -88,12 +88,22 @@ runFisherTest <- function(analytes, if (biospecimen == "Adipose") { biospecimen <- "Adipose tissue" } + + # we don't need all fields from all tables joined # Get metabolites that belong to a specific biospecimen + # query <- paste0( + # "SELECT analytehasontology.*, ontology.*, analytehaspathway.* from analytehasontology, ontology, analytehaspathway where ontology.commonName in ('", + # biospecimen, + # "') and ontology.rampOntologyId = analytehasontology.rampOntologyId and analytehasontology.rampCompoundId = analytehaspathway.rampId" + # ) + + # less data pull back query <- paste0( - "SELECT analytehasontology.*, ontology.*, analytehaspathway.* from analytehasontology, ontology, analytehaspathway where ontology.commonName in ('", + "SELECT analytehaspathway.* from analytehasontology, ontology, analytehaspathway where ontology.commonName in ('", biospecimen, - "') and ontology.rampOntologyId = analytehasontology.rampOntologyId and analytehasontology.rampCompoundId = analytehaspathway.rampId" + "') and analytehasontology.rampOntologyId = ontology.rampOntologyId and analytehasontology.rampCompoundId = analytehaspathway.rampId" ) + con <- connectToRaMP() backgrounddf <- RMariaDB::dbGetQuery(con, query) RMariaDB::dbDisconnect(con) @@ -152,7 +162,7 @@ runFisherTest <- function(analytes, # Get the total number of metabolites that are mapped to pathways in RaMP (that's the default background) # added conditional to not pull hmdb ids - query <- "select * from analytehaspathway where pathwaySource != 'hmdb';" + query <- "select distinct rampId, pathwaySource from analytehaspathway where pathwaySource != 'hmdb';" con <- connectToRaMP() allids <- RMariaDB::dbGetQuery(con, query) @@ -204,9 +214,13 @@ runFisherTest <- function(analytes, # Loop through each pathway, build the contingency table, and calculate Fisher's Exact # test p-value pval <- totinpath <- userinpath <- pidused <- c() + + pidCount = 0 + for (i in pid) { ids_inpath <- pathwaydf[which(pathwaydf$pathwayRampId == i), "rampId"] + pidCount = pidCount + 1 if (analyte_type == "metabolites") { # Check to make sure that this pathway does have metabolites @@ -219,13 +233,15 @@ runFisherTest <- function(analytes, bg_in_pathway <- length(unique(grep("RAMP_C", ids_inpath_bg, value = TRUE))) } } - inputkegg <- segregated_id_list[[1]][1][[1]] - inputreact <- segregated_id_list[[1]][2][[1]] - inputwiki <- segregated_id_list[[1]][3][[1]] - inputcustom <- segregated_id_list[[1]][[4]] - tot_user_analytes <- length(grep("RAMP_C", unique(pathwaydf$rampId))) - if (background_type != "database") { - tot_bg_analytes <- length(grep("RAMP_C", unique(backgrounddf$rampId))) + if(pidCount == 1) { + inputkegg <- segregated_id_list[[1]][1][[1]] + inputreact <- segregated_id_list[[1]][2][[1]] + inputwiki <- segregated_id_list[[1]][3][[1]] + inputcustom <- segregated_id_list[[1]][[4]] + tot_user_analytes <- length(grep("RAMP_C", unique(pathwaydf$rampId))) + if (background_type != "database") { + tot_bg_analytes <- length(grep("RAMP_C", unique(backgrounddf$rampId))) + } } } else { # if genes # Check to make sure that this pathway does have genes @@ -234,11 +250,13 @@ runFisherTest <- function(analytes, } else { user_in_pathway <- length(unique(grep("RAMP_G", ids_inpath, value = TRUE))) } - inputkegg <- segregated_id_list[[2]][1][[1]] - inputreact <- segregated_id_list[[2]][2][[1]] - inputwiki <- segregated_id_list[[2]][3][[1]] - inputcustom <- segregated_id_list[[2]][[4]] - tot_user_analytes <- length(grep("RAMP_G", unique(pathwaydf$rampId))) + if(pidCount == 1) { + inputkegg <- segregated_id_list[[2]][1][[1]] + inputreact <- segregated_id_list[[2]][2][[1]] + inputwiki <- segregated_id_list[[2]][3][[1]] + inputcustom <- segregated_id_list[[2]][[4]] + tot_user_analytes <- length(grep("RAMP_G", unique(pathwaydf$rampId))) + } ## tot_bg_analytes <- length(grep("RAMP_G", unique(backgrounddf$rampId))) } if ((!is.na(inputkegg$pathwayRampId[1])) && i %in% inputkegg$pathwayRampId) { diff --git a/R/plottingFunctions.R b/R/plottingFunctions.R index 751b23f5..58f44de5 100644 --- a/R/plottingFunctions.R +++ b/R/plottingFunctions.R @@ -65,15 +65,21 @@ plotCataNetwork <- function(catalyzedf = "") { #' @param sig_cutoff Aesthetic, shows pvalue cutoff for significant pathways #' @param interactive If TRUE, return interactive plotly object instead of ggplot object #' @export -pathwayResultsPlot <- function(pathwaysSig, pval = "FDR", perc_analyte_overlap = 0.2, - perc_pathway_overlap = 0.2, min_pathway_tocluster = 3, +pathwayResultsPlot <- function(pathwaysSig, pval = "FDR", perc_analyte_overlap = 0.5, + perc_pathway_overlap = 0.5, min_pathway_tocluster = 3, text_size = 16, sig_cutoff = 0.05, interactive=FALSE) { + + if( !('cluster_assignment' %in% colnames(pathwaysSig$fishresult))) { fishClustering <- findCluster(pathwaysSig, perc_analyte_overlap = perc_analyte_overlap, perc_pathway_overlap = perc_pathway_overlap, min_pathway_tocluster = min_pathway_tocluster - ) - fishresult <- fishClustering$fishresults + ) + fishresult <- fishClustering$fishresults + } else { + fishresult <- pathwaysSig$fishresults + } + if (pathwaysSig$analyte_type == "genes" | pathwaysSig$analyte_type == "metabolites") { inPath <- fishresult$Num_In_Path totPath <- fishresult$Total_In_Path From 8d26ebf43e3c053bbcde0a35369ff6646c8e7e98 Mon Sep 17 00:00:00 2001 From: johnbraisted Date: Tue, 26 Sep 2023 17:19:56 -0400 Subject: [PATCH 2/6] commit reaction queries --- R/rampReactionQueries.R | 251 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 251 insertions(+) create mode 100644 R/rampReactionQueries.R diff --git a/R/rampReactionQueries.R b/R/rampReactionQueries.R new file mode 100644 index 00000000..377a357e --- /dev/null +++ b/R/rampReactionQueries.R @@ -0,0 +1,251 @@ +# RaMP Reaction Queries + +#' getReactionsForAnalytes +#' +#' @param analytes list of analytes +#' @param analyteType analyte type, 'metabolites' (default), 'genes' or 'both' +#' @param namesOrIds indicates if input analyte list contains identifiers or analyte names +#' @param onlyHumanMets boolean to only return pathways containing only human metabolites (ChEBI ontology) (dev in progress) +#' @param humanProtein boolean to only control pathways catalyzed by a human proteins (having human Uniprot) (dev in progress) +#' @param includeTransportRxns if TRUE, returns metabolic and transport reactions +#' @param rxnDirs character vector of length > 1, specifying reaction directions to return c("UN", "LR", "RL", "BD", "ALL"), default = c("UN"). +#' +#' @return a list of reaction information on each input analyte, separate data.frame for metabolites, genes, and common reactions +#' @export +#' +#' @examples +getReactionsForAnalytes <- function(analytes, analyteType='metabolites', namesOrIds='ids', onlyHumanMets=F, humanProtein=F, includeTransportRxns=F, rxnDirs=c("UN")) { + + genes <- data.frame() + metabolites <- data.frame() + mdf <- data.frame() + gdf <- data.frame() + if(namesOrIds == 'ids') { + analyteSourceInfo <- getRampSourceInfoFromAnalyteIDs(analytes) + resultGenes <- analyteSourceInfo[analyteSourceInfo$geneOrCompound == 'gene',] + resultMets <- analyteSourceInfo[analyteSourceInfo$geneOrCompound == 'compound',] + + if(!is.null(resultMets) && nrow(resultMets)>0) { + + print("Retrieving reactions for compounds") + + mets <- resultMets[,c('sourceId', 'rampId')] + rampIds <- unique(unlist(mets$rampId)) + mdf <- getReactionsForRaMPCompoundIds(rampCompoundIds=rampIds, onlyHumanMets=onlyHumanMets, humanProtein=humanProtein, includeTransportRxns=includeTransportRxns, rxnDirs=rxnDirs) + if(nrow(mdf) > 0) { + mdf <- merge(mets, mdf, by.x='rampId', by.y='ramp_cmpd_id') + } + } + + if(!is.null(resultGenes) && nrow(resultGenes)>0) { + + print("Retrieving reactions for genes/proteins") + + genes <- resultGenes[,c('sourceId', 'rampId')] + rampIds <- unique(unlist(genes$rampId)) + gdf <- getReactionsForRaMPGeneIds(rampGeneIds=rampIds, onlyHumanMets=onlyHumanMets, humanProtein=humanProtein, includeTransportRxns) + if(nrow(gdf) > 0) { + gdf <- merge(genes, gdf, by.x='rampId', by.y='ramp_gene_id') + } + } + + # if(!is.null(resultMets) && ncol(resultMets)>0) { + # metatabolites <- resultMets[,c('sourceId', 'rampId')] + # } + + } else { # query on names + # do we have a helper function to get ramp ids from names? + + } + + resultList <- list() + resultList[['met2rxn']] <- mdf + resultList[['prot2rxn']] <- gdf + resultList[['metProteinCommonReactions']] <- data.frame() + + # find common reactions + if(nrow(mdf) > 0 && nrow(gdf) > 0) { + mRampRxnIds <- unlist(mdf$ramp_rxn_id) + gRampRxnIds <- unlist(gdf$ramp_rxn_id) + commonRxnIds <- intersect(mRampRxnIds, gRampRxnIds) + if(length(commonRxnIds) > 0) { + mRxn <- mdf[mdf$ramp_rxn_id %in% commonRxnIds,] + gRxn <- gdf[gdf$ramp_rxn_id %in% commonRxnIds,] + + # one row per reaction, include concat mets, concat proteins + # probably a more streamlined way to do this... + commonRampRxnList <- c() + commonRxnList <- c() + mRxnSourceIds <- c() + mRxnSourceNames <- c() + gRxnSourceIds <- c() + gRxnSourceName <- c() + gRxnUniprot <- c() + commonRxnLabel <- c() + commonRxnHTMLEq <- c() + hasHumanProtein <- c() + onlyHumanMets <- c() + isTransport <- c() + rxnDir <- c() + for(rxn in commonRxnIds) { + commonRampRxnList <- c(commonRampRxnList, rxn) + commonRxnList <- c(commonRxnList, paste0(unique(mRxn$rxn_source_id[mRxn$ramp_rxn_id == rxn]), collapse = ',')) + mRxnSourceIds <- c(mRxnSourceIds, paste0(unique(mRxn$sourceId[mRxn$ramp_rxn_id == rxn]), collapse = ',')) + mRxnSourceNames <- c(mRxnSourceNames, paste0(unique(mRxn$met_name[mRxn$ramp_rxn_id == rxn]), collapse = ',')) + + gRxnSourceIds <- c(gRxnSourceIds, paste0(unique(gRxn$sourceId[gRxn$ramp_rxn_id == rxn]), collapse = ',')) + gRxnUniprot <- c(gRxnUniprot, paste0(unique(gRxn$uniprot[gRxn$ramp_rxn_id == rxn]), collapse = ',')) + gRxnSourceName <- c(gRxnSourceName, paste0(unique(gRxn$protein_name[gRxn$ramp_rxn_id == rxn]), collapse = ',')) + + rxnDir <- c(rxnDir, paste0(unique(mRxn$direction[mRxn$ramp_rxn_id == rxn]), collapse = ',')) + + commonRxnLabel <- c(commonRxnLabel, paste0(unique(mRxn$label[mRxn$ramp_rxn_id == rxn]), collapse = ',')) + commonRxnHTMLEq <- c(commonRxnHTMLEq, paste0(unique(mRxn$html_equation[mRxn$ramp_rxn_id == rxn]), collapse = ',')) + + isTransport <- c(isTransport, paste0(unique(mRxn$is_transport[mRxn$ramp_rxn_id == rxn]), collapse = ',')) + hasHumanProtein <- c(hasHumanProtein, paste0(unique(mRxn$has_human_prot[mRxn$ramp_rxn_id == rxn]), collapse = ',')) + onlyHumanMets <- c(onlyHumanMets, paste0(unique(mRxn$only_human_mets[mRxn$ramp_rxn_id == rxn]), collapse = ',')) + } + commonReactions <- data.frame(list('metabolites' = mRxnSourceIds, 'met_names' = mRxnSourceNames ,'genes' = gRxnSourceIds, + 'uniprot' = gRxnUniprot, 'proteinNames' = gRxnSourceName, + 'reactionId' = commonRxnList, 'rxn_dir' = rxnDir,'rxn_label' = commonRxnLabel, + 'rxn_html_label' = commonRxnHTMLEq, + 'is_transport' = isTransport, + 'has_human_protein' = hasHumanProtein, + 'only_human_mets' = onlyHumanMets)) + + resultList[['metProteinCommonReactions']] <- commonReactions + } + } + + return(resultList) +} + +#' getReactionsForRaMPCompoundIds returns reactions for a collection of ramp compound ids +#' +#' @param rampCompoundIds list of ramp compound ids +#' @param onlyHumanMets boolean to only return pathways containing only human metabolites (ChEBI ontology) (dev in progress) +#' @param humanProtein boolean to only control pathways catalyzed by a human proteins (having human Uniprot) (dev in progress) +#' @param includeTransportRxns if TRUE, returns metabolic and transport reactions +#' +#' @return returns a dataframe of reaction information for each ramp compound id +#' +#' @examples +getReactionsForRaMPCompoundIds <- function(rampCompoundIds, onlyHumanMets=F, humanProtein=F, includeTransportRxns=F, rxnDirs=c("UN")) { + + idStr <- listToQueryString(rampCompoundIds) + query <- paste0("select mr.ramp_rxn_id, mr.ramp_cmpd_id, mr.met_source_id, mr.substrate_product, mr.is_cofactor, mr.met_name, + mr.ramp_rxn_id, rxn.rxn_source_id, rxn.is_transport, rxn.label, rxn.direction, rxn.equation, rxn.html_equation, rxn.ec_num, rxn.has_human_prot, rxn.only_human_mets + from reaction2met mr, reaction rxn + where mr.ramp_cmpd_id in (",idStr,") and rxn.ramp_rxn_id = mr.ramp_rxn_id") + + if(length(rxnDirs) == 1) { + query <- paste0(query, " and rxn.direction = '",rxnDirs[1],"'") + } else if(length(rxnDirs)>1) { + query <- paste0(query, " and rxn.direction in (",listToQueryString(rxnDirs),")") + } else { + print("rxnDirs must be of length > 0") + } + + if(humanProtein) { + query <- paste0(query," and rxn.has_human_prot = 1") + } + + if(onlyHumanMets) { + query <- paste0(query, " and rxn.only_human_mets = 1") + } + + if(!includeTransportRxns) { + query <- paste0(query, " and rxn.is_transport = 0") + } + + df <- RaMP::runQuery(query) + + return(df) +} + + + +#' getReactionsForRaMPGeneIds returns reactions for a collection of ramp compound ids +#' +#' @param rampGeneIds list of ramp compound ids +#' @param onlyHumanMets boolean to only return pathways containing only human metabolites (ChEBI ontology) (dev in progress) +#' @param humanProtein boolean to only control pathways catalyzed by a human proteins (having human Uniprot) (dev in progress) +#' @param includeTransportRxns if TRUE, returns metabolic and transport reactions +#' +#' @return returns a dataframe of reaction information for each ramp compound id +#' +#' @examples +getReactionsForRaMPGeneIds <- function(rampGeneIds, onlyHumanMets=F, humanProtein=F, includeTransportRxns=F, rxnDirs=c("UN")) { + + idStr <- listToQueryString(rampGeneIds) + query <- paste0("select gr.ramp_rxn_id, gr.ramp_gene_id, gr.uniprot, gr.protein_name, + gr.ramp_rxn_id, rxn.rxn_source_id, rxn.is_transport, rxn.label, rxn.direction, rxn.equation, rxn.html_equation, rxn.ec_num, rxn.has_human_prot, rxn.only_human_mets + from reaction2protein gr, reaction rxn + where gr.ramp_gene_id in (",idStr,") and rxn.ramp_rxn_id = gr.ramp_rxn_id") + + if(length(rxnDirs) == 1) { + query <- paste0(query, " and rxn.direction = '",rxnDirs[1],"'") + } else if(length(rxnDirs)>1) { + query <- paste0(query, " and rxn.direction in (",listToQueryString(rxnDirs),")") + } else { + print("rxnDirs must be of length > 0") + } + + if(humanProtein) { + query <- paste0(query, " and rxn.has_human_prot = 1") + } + + if(onlyHumanMets) { + query <- paste0(query, " and rxn.only_human_mets = 1") + } + + if(!includeTransportRxns) { + query <- paste0(query, " and rxn.is_transport = 0") + } + + df <- RaMP::runQuery(query) + + return(df) +} + + +##################################################### +################################### +# +# general dev note... perhaps the functions below should be moved to rampQueryHelpers +# +## + + + +#' getRampSourceInfoFromAnalyteIDs Utility method to extract source table information from analyte ids +#' +#' @param analytes list of analyte ids +#' +#' @return returns a dataframe of ramp analyte source information +#' +#' @examples +getRampSourceInfoFromAnalyteIDs <- function(analytes) { + + analyteStr <- listToQueryString(analytes) + + query = paste("select distinct sourceId, rampId, geneOrCompound from source where sourceId in (",analyteStr,")") + + df <- RaMP::runQuery(query) + + return(df) +} + + +#' listToQueryString utility method to convert an id list to a comma separate string, with single quoted values. +#' +#' @param analytes list of analytes (can be names or ids) +#' +#' @return comma separated list of single quoted analyte ids or names +#' +#' @examples +listToQueryString <- function(analytes) { + analyteStr <- paste0("'", paste0(analytes, collapse = "','"), "'", sep="") + return (analyteStr) +} From 65e777e6f1fe52120c00c404093230f0f6de1597 Mon Sep 17 00:00:00 2001 From: johnbraisted Date: Mon, 2 Oct 2023 09:34:12 -0400 Subject: [PATCH 3/6] reaction queries --- R/rampReactionQueries.R | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/R/rampReactionQueries.R b/R/rampReactionQueries.R index 377a357e..9db883b4 100644 --- a/R/rampReactionQueries.R +++ b/R/rampReactionQueries.R @@ -159,8 +159,9 @@ getReactionsForRaMPCompoundIds <- function(rampCompoundIds, onlyHumanMets=F, hum query <- paste0(query, " and rxn.is_transport = 0") } - df <- RaMP::runQuery(query) - + conn <- connectToRaMP() + df <- RMariaDB::dbGetQuery(conn,query) + RMariaDB::dbDisconnect(conn=conn) return(df) } @@ -204,8 +205,9 @@ getReactionsForRaMPGeneIds <- function(rampGeneIds, onlyHumanMets=F, humanProtei query <- paste0(query, " and rxn.is_transport = 0") } - df <- RaMP::runQuery(query) - + conn <- connectToRaMP() + df <- RMariaDB::dbGetQuery(conn,query) + RMariaDB::dbDisconnect(conn=conn) return(df) } @@ -232,7 +234,9 @@ getRampSourceInfoFromAnalyteIDs <- function(analytes) { query = paste("select distinct sourceId, rampId, geneOrCompound from source where sourceId in (",analyteStr,")") - df <- RaMP::runQuery(query) + conn <- connectToRaMP() + df <- RMariaDB::dbGetQuery(conn,query) + RMariaDB::dbDisconnect(conn=conn) return(df) } From c7eada4b8cc0f2fc9929aa849cbe3df5736e3a50 Mon Sep 17 00:00:00 2001 From: johnbraisted Date: Thu, 12 Oct 2023 09:56:13 -0400 Subject: [PATCH 4/6] add new Rhea reaction queries --- NAMESPACE | 1 + R/ReturnPathwaysEnrich_InputAnalytes.R | 14 ++++++++------ R/rampReactionQueries.R | 10 ---------- man/pathwayResultsPlot.Rd | 4 ++-- 4 files changed, 11 insertions(+), 18 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 322e5923..7d4b2537 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,6 +19,7 @@ export(getPathwayFromAnalyte) export(getPathwayNameList) export(getPrefixesFromAnalytes) export(getRaMPAnalyteIntersections) +export(getReactionsForAnalytes) export(pathwayResultsPlot) export(plotCataNetwork) export(rampFastCata) diff --git a/R/ReturnPathwaysEnrich_InputAnalytes.R b/R/ReturnPathwaysEnrich_InputAnalytes.R index a0010eca..0779e4fb 100644 --- a/R/ReturnPathwaysEnrich_InputAnalytes.R +++ b/R/ReturnPathwaysEnrich_InputAnalytes.R @@ -893,12 +893,14 @@ getPathwayFromAnalyte <- function(analytes = "none", return(df2) } - -##' @param analytes a vector of analytes (genes or metabolites) that need to be searched -##' @param pathways If "RaMP" (default), use pathway definitions within RaMP-DB. Else, supply path to gmx file containing custom pathway definitions. GMX files are a tab-separated format that contain one analyte set per column, with the name of the set in the first row, and constituent analytes in subsequent rows -##' @param analyte_type "genes" or "metabolites" -##' @return A pathwaydf compatible with runFisherTest -##' @author Andrew Patt +#' Reads an excel file containing metabolite and gene annotations and filters pathways +#' to those pathways represented by the input analytes. +#' +#' @param analytes a vector of analytes (genes or metabolites) that need to be searched +#' @param pathways If "RaMP" (default), use pathway definitions within RaMP-DB. Else, supply path to gmx file containing custom pathway definitions. GMX files are a tab-separated format that contain one analyte set per column, with the name of the set in the first row, and constituent analytes in subsequent rows +#' @param analyte_type "genes" or "metabolites" +#' @return A pathwaydf compatible with runFisherTest +#' @author Andrew Patt getCustomPathwayFromAnalyte <- function(analytes, pathways, analyte_type) { print("Starting getCustomPathwayFromAnalyte()") tryCatch( diff --git a/R/rampReactionQueries.R b/R/rampReactionQueries.R index 9db883b4..a82c9a96 100644 --- a/R/rampReactionQueries.R +++ b/R/rampReactionQueries.R @@ -12,8 +12,6 @@ #' #' @return a list of reaction information on each input analyte, separate data.frame for metabolites, genes, and common reactions #' @export -#' -#' @examples getReactionsForAnalytes <- function(analytes, analyteType='metabolites', namesOrIds='ids', onlyHumanMets=F, humanProtein=F, includeTransportRxns=F, rxnDirs=c("UN")) { genes <- data.frame() @@ -129,8 +127,6 @@ getReactionsForAnalytes <- function(analytes, analyteType='metabolites', namesOr #' @param includeTransportRxns if TRUE, returns metabolic and transport reactions #' #' @return returns a dataframe of reaction information for each ramp compound id -#' -#' @examples getReactionsForRaMPCompoundIds <- function(rampCompoundIds, onlyHumanMets=F, humanProtein=F, includeTransportRxns=F, rxnDirs=c("UN")) { idStr <- listToQueryString(rampCompoundIds) @@ -175,8 +171,6 @@ getReactionsForRaMPCompoundIds <- function(rampCompoundIds, onlyHumanMets=F, hum #' @param includeTransportRxns if TRUE, returns metabolic and transport reactions #' #' @return returns a dataframe of reaction information for each ramp compound id -#' -#' @examples getReactionsForRaMPGeneIds <- function(rampGeneIds, onlyHumanMets=F, humanProtein=F, includeTransportRxns=F, rxnDirs=c("UN")) { idStr <- listToQueryString(rampGeneIds) @@ -226,8 +220,6 @@ getReactionsForRaMPGeneIds <- function(rampGeneIds, onlyHumanMets=F, humanProtei #' @param analytes list of analyte ids #' #' @return returns a dataframe of ramp analyte source information -#' -#' @examples getRampSourceInfoFromAnalyteIDs <- function(analytes) { analyteStr <- listToQueryString(analytes) @@ -247,8 +239,6 @@ getRampSourceInfoFromAnalyteIDs <- function(analytes) { #' @param analytes list of analytes (can be names or ids) #' #' @return comma separated list of single quoted analyte ids or names -#' -#' @examples listToQueryString <- function(analytes) { analyteStr <- paste0("'", paste0(analytes, collapse = "','"), "'", sep="") return (analyteStr) diff --git a/man/pathwayResultsPlot.Rd b/man/pathwayResultsPlot.Rd index 73f39d76..16019c54 100644 --- a/man/pathwayResultsPlot.Rd +++ b/man/pathwayResultsPlot.Rd @@ -7,8 +7,8 @@ pathwayResultsPlot( pathwaysSig, pval = "FDR", - perc_analyte_overlap = 0.2, - perc_pathway_overlap = 0.2, + perc_analyte_overlap = 0.5, + perc_pathway_overlap = 0.5, min_pathway_tocluster = 3, text_size = 16, sig_cutoff = 0.05, From 8f96dd2f03d53a35a999114dc5ba78dd5963726e Mon Sep 17 00:00:00 2001 From: johnbraisted Date: Thu, 12 Oct 2023 10:14:37 -0400 Subject: [PATCH 5/6] add reaction docs and tests --- man/getCustomPathwayFromAnalyte.Rd | 26 ++++++++ man/getRampSourceInfoFromAnalyteIDs.Rd | 17 +++++ man/getReactionsForAnalytes.Rd | 37 +++++++++++ man/getReactionsForRaMPCompoundIds.Rd | 29 ++++++++ man/getReactionsForRaMPGeneIds.Rd | 29 ++++++++ man/listToQueryString.Rd | 17 +++++ tests/testthat/test-analytes_to_reactions.R | 73 +++++++++++++++++++++ 7 files changed, 228 insertions(+) create mode 100644 man/getCustomPathwayFromAnalyte.Rd create mode 100644 man/getRampSourceInfoFromAnalyteIDs.Rd create mode 100644 man/getReactionsForAnalytes.Rd create mode 100644 man/getReactionsForRaMPCompoundIds.Rd create mode 100644 man/getReactionsForRaMPGeneIds.Rd create mode 100644 man/listToQueryString.Rd create mode 100644 tests/testthat/test-analytes_to_reactions.R diff --git a/man/getCustomPathwayFromAnalyte.Rd b/man/getCustomPathwayFromAnalyte.Rd new file mode 100644 index 00000000..21d95cb6 --- /dev/null +++ b/man/getCustomPathwayFromAnalyte.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ReturnPathwaysEnrich_InputAnalytes.R +\name{getCustomPathwayFromAnalyte} +\alias{getCustomPathwayFromAnalyte} +\title{Reads an excel file containing metabolite and gene annotations and filters pathways +to those pathways represented by the input analytes.} +\usage{ +getCustomPathwayFromAnalyte(analytes, pathways, analyte_type) +} +\arguments{ +\item{analytes}{a vector of analytes (genes or metabolites) that need to be searched} + +\item{pathways}{If "RaMP" (default), use pathway definitions within RaMP-DB. Else, supply path to gmx file containing custom pathway definitions. GMX files are a tab-separated format that contain one analyte set per column, with the name of the set in the first row, and constituent analytes in subsequent rows} + +\item{analyte_type}{"genes" or "metabolites"} +} +\value{ +A pathwaydf compatible with runFisherTest +} +\description{ +Reads an excel file containing metabolite and gene annotations and filters pathways +to those pathways represented by the input analytes. +} +\author{ +Andrew Patt +} diff --git a/man/getRampSourceInfoFromAnalyteIDs.Rd b/man/getRampSourceInfoFromAnalyteIDs.Rd new file mode 100644 index 00000000..8fd3aef7 --- /dev/null +++ b/man/getRampSourceInfoFromAnalyteIDs.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rampReactionQueries.R +\name{getRampSourceInfoFromAnalyteIDs} +\alias{getRampSourceInfoFromAnalyteIDs} +\title{getRampSourceInfoFromAnalyteIDs Utility method to extract source table information from analyte ids} +\usage{ +getRampSourceInfoFromAnalyteIDs(analytes) +} +\arguments{ +\item{analytes}{list of analyte ids} +} +\value{ +returns a dataframe of ramp analyte source information +} +\description{ +getRampSourceInfoFromAnalyteIDs Utility method to extract source table information from analyte ids +} diff --git a/man/getReactionsForAnalytes.Rd b/man/getReactionsForAnalytes.Rd new file mode 100644 index 00000000..3e773e30 --- /dev/null +++ b/man/getReactionsForAnalytes.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rampReactionQueries.R +\name{getReactionsForAnalytes} +\alias{getReactionsForAnalytes} +\title{getReactionsForAnalytes} +\usage{ +getReactionsForAnalytes( + analytes, + analyteType = "metabolites", + namesOrIds = "ids", + onlyHumanMets = F, + humanProtein = F, + includeTransportRxns = F, + rxnDirs = c("UN") +) +} +\arguments{ +\item{analytes}{list of analytes} + +\item{analyteType}{analyte type, 'metabolites' (default), 'genes' or 'both'} + +\item{namesOrIds}{indicates if input analyte list contains identifiers or analyte names} + +\item{onlyHumanMets}{boolean to only return pathways containing only human metabolites (ChEBI ontology) (dev in progress)} + +\item{humanProtein}{boolean to only control pathways catalyzed by a human proteins (having human Uniprot) (dev in progress)} + +\item{includeTransportRxns}{if TRUE, returns metabolic and transport reactions} + +\item{rxnDirs}{character vector of length > 1, specifying reaction directions to return c("UN", "LR", "RL", "BD", "ALL"), default = c("UN").} +} +\value{ +a list of reaction information on each input analyte, separate data.frame for metabolites, genes, and common reactions +} +\description{ +getReactionsForAnalytes +} diff --git a/man/getReactionsForRaMPCompoundIds.Rd b/man/getReactionsForRaMPCompoundIds.Rd new file mode 100644 index 00000000..3e4eca19 --- /dev/null +++ b/man/getReactionsForRaMPCompoundIds.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rampReactionQueries.R +\name{getReactionsForRaMPCompoundIds} +\alias{getReactionsForRaMPCompoundIds} +\title{getReactionsForRaMPCompoundIds returns reactions for a collection of ramp compound ids} +\usage{ +getReactionsForRaMPCompoundIds( + rampCompoundIds, + onlyHumanMets = F, + humanProtein = F, + includeTransportRxns = F, + rxnDirs = c("UN") +) +} +\arguments{ +\item{rampCompoundIds}{list of ramp compound ids} + +\item{onlyHumanMets}{boolean to only return pathways containing only human metabolites (ChEBI ontology) (dev in progress)} + +\item{humanProtein}{boolean to only control pathways catalyzed by a human proteins (having human Uniprot) (dev in progress)} + +\item{includeTransportRxns}{if TRUE, returns metabolic and transport reactions} +} +\value{ +returns a dataframe of reaction information for each ramp compound id +} +\description{ +getReactionsForRaMPCompoundIds returns reactions for a collection of ramp compound ids +} diff --git a/man/getReactionsForRaMPGeneIds.Rd b/man/getReactionsForRaMPGeneIds.Rd new file mode 100644 index 00000000..828da5d6 --- /dev/null +++ b/man/getReactionsForRaMPGeneIds.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rampReactionQueries.R +\name{getReactionsForRaMPGeneIds} +\alias{getReactionsForRaMPGeneIds} +\title{getReactionsForRaMPGeneIds returns reactions for a collection of ramp compound ids} +\usage{ +getReactionsForRaMPGeneIds( + rampGeneIds, + onlyHumanMets = F, + humanProtein = F, + includeTransportRxns = F, + rxnDirs = c("UN") +) +} +\arguments{ +\item{rampGeneIds}{list of ramp compound ids} + +\item{onlyHumanMets}{boolean to only return pathways containing only human metabolites (ChEBI ontology) (dev in progress)} + +\item{humanProtein}{boolean to only control pathways catalyzed by a human proteins (having human Uniprot) (dev in progress)} + +\item{includeTransportRxns}{if TRUE, returns metabolic and transport reactions} +} +\value{ +returns a dataframe of reaction information for each ramp compound id +} +\description{ +getReactionsForRaMPGeneIds returns reactions for a collection of ramp compound ids +} diff --git a/man/listToQueryString.Rd b/man/listToQueryString.Rd new file mode 100644 index 00000000..1dc303bf --- /dev/null +++ b/man/listToQueryString.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rampReactionQueries.R +\name{listToQueryString} +\alias{listToQueryString} +\title{listToQueryString utility method to convert an id list to a comma separate string, with single quoted values.} +\usage{ +listToQueryString(analytes) +} +\arguments{ +\item{analytes}{list of analytes (can be names or ids)} +} +\value{ +comma separated list of single quoted analyte ids or names +} +\description{ +listToQueryString utility method to convert an id list to a comma separate string, with single quoted values. +} diff --git a/tests/testthat/test-analytes_to_reactions.R b/tests/testthat/test-analytes_to_reactions.R new file mode 100644 index 00000000..d3b9db5f --- /dev/null +++ b/tests/testthat/test-analytes_to_reactions.R @@ -0,0 +1,73 @@ +test_that("analytes_to_reactions", { + library(properties) + dbpass <- properties::read.properties('../../dbprops.txt') + pkg.globals <- setConnectionToRaMP(host=dbpass$hostname, dbname=dbpass$dbname, username=dbpass$username, conpass=dbpass$conpass) + assign("pkg.globals", pkg.globals, envir = .GlobalEnv) + + analytes = c("uniprot:P04406", "uniprot:P00338", "uniprot:P11413", "uniprot:Q99798", "uniprot:P08559", + "chebi:15361", "chebi:14314", "chebi:24996") + + rxnResult <- RaMP::getReactionsForAnalytes(analytes=analytes, analyteType="both", humanProtein = T) + + expect_true(ncol(rxnResult[[1]]) == 17, label="met2rxn 17 columns") + + expect_true(nrow(rxnResult[[1]]) > 0, label = "have met2rxn results") + + expect_true(ncol(rxnResult[[2]]) == 15, label="prot2rxn 15 columns") + + expect_true(nrow(rxnResult[[2]]) > 0, label = "have prot2rxn results") + + expect_true(ncol(rxnResult[[3]]) == 12, label="mp_common_rxn 12 columns") + + expect_true(nrow(rxnResult[[3]]) > 0, label = "have mp_common_rxn results") +}) + + +test_that("metabolites_to_reactions", { + library(properties) + dbpass <- properties::read.properties('../../dbprops.txt') + pkg.globals <- setConnectionToRaMP(host=dbpass$hostname, dbname=dbpass$dbname, username=dbpass$username, conpass=dbpass$conpass) + assign("pkg.globals", pkg.globals, envir = .GlobalEnv) + + analytes = c("chebi:15361", "chebi:14314", "chebi:24996") + + rxnResult <- RaMP::getReactionsForAnalytes(analytes=analytes, analyteType="metabolites", humanProtein = T) + + expect_true(ncol(rxnResult[[1]]) == 17, label="met2rxn 17 columns") + + expect_true(nrow(rxnResult[[1]]) > 0, label = "have met2rxn results") + + expect_true(ncol(rxnResult[[2]]) == 0, label="empty prot2rxn 15 columns") + + expect_true(nrow(rxnResult[[2]]) == 0, label = "empty have prot2rxn results") + + expect_true(ncol(rxnResult[[3]]) == 0, label="empty mp_common_rxn") + + expect_true(nrow(rxnResult[[3]]) == 0, label = "empty mp_common_rxn") +}) + + +test_that("proteins_to_reactions", { + library(properties) + dbpass <- properties::read.properties('../../dbprops.txt') + pkg.globals <- setConnectionToRaMP(host=dbpass$hostname, dbname=dbpass$dbname, username=dbpass$username, conpass=dbpass$conpass) + assign("pkg.globals", pkg.globals, envir = .GlobalEnv) + + analytes = c("uniprot:P04406", "uniprot:P00338", "uniprot:P11413", "uniprot:Q99798", "uniprot:P08559") + + rxnResult <- RaMP::getReactionsForAnalytes(analytes=analytes, analyteType="both", humanProtein = T) + + expect_true(ncol(rxnResult[[1]]) == 0, label="empty met2rxn") + + expect_true(nrow(rxnResult[[1]]) == 0, label = "empty met2rxn") + + expect_true(ncol(rxnResult[[2]]) == 15, label="prot2rxn 15 columns") + + expect_true(nrow(rxnResult[[2]]) > 0, label = "have prot2rxn results") + + expect_true(ncol(rxnResult[[3]]) == 0, label="empty mp_common_rxn") + + expect_true(nrow(rxnResult[[3]]) == 0, label = "empty mp_common_rxn") +}) + + From 8dd8f0ab6a4dd35079166d3290857280669a1f4b Mon Sep 17 00:00:00 2001 From: johnbraisted Date: Thu, 12 Oct 2023 10:17:11 -0400 Subject: [PATCH 6/6] update version to 2.3.3 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0669de47..88c906bc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: RaMP Title: RaMP (Relational Database of Metabolomic Pathways) Type: Package -Version: 2.3.2 +Version: 2.3.3 License: GPL-2 Depends: R (>= 3.6.0) Authors@R: c(