From 71dfbef5927dd2bd4fcff357a961011c38e54517 Mon Sep 17 00:00:00 2001
From: johnbraisted <jb212828@gmail.com>
Date: Thu, 7 Sep 2023 19:02:05 -0400
Subject: [PATCH 1/6] copy pw_enrich_dev Fisher optimizations to this patch

---
 R/ReturnPathwaysEnrich_InputAnalytes.R | 48 ++++++++++++++++++--------
 R/plottingFunctions.R                  | 14 +++++---
 2 files changed, 43 insertions(+), 19 deletions(-)

diff --git a/R/ReturnPathwaysEnrich_InputAnalytes.R b/R/ReturnPathwaysEnrich_InputAnalytes.R
index efaca8ba..a0010eca 100644
--- a/R/ReturnPathwaysEnrich_InputAnalytes.R
+++ b/R/ReturnPathwaysEnrich_InputAnalytes.R
@@ -88,12 +88,22 @@ runFisherTest <- function(analytes,
       if (biospecimen == "Adipose") {
         biospecimen <- "Adipose tissue"
       }
+
+      # we don't need all fields from all tables joined
       # Get metabolites that belong to a specific biospecimen
+      # query <- paste0(
+      #   "SELECT analytehasontology.*, ontology.*, analytehaspathway.* from analytehasontology, ontology, analytehaspathway where ontology.commonName in ('",
+      #   biospecimen,
+      #   "') and ontology.rampOntologyId = analytehasontology.rampOntologyId and analytehasontology.rampCompoundId = analytehaspathway.rampId"
+      # )
+
+      # less data pull back
       query <- paste0(
-        "SELECT analytehasontology.*, ontology.*, analytehaspathway.* from analytehasontology, ontology, analytehaspathway where ontology.commonName in ('",
+        "SELECT analytehaspathway.* from analytehasontology, ontology, analytehaspathway where ontology.commonName in ('",
         biospecimen,
-        "') and ontology.rampOntologyId = analytehasontology.rampOntologyId and analytehasontology.rampCompoundId = analytehaspathway.rampId"
+        "') and analytehasontology.rampOntologyId = ontology.rampOntologyId and analytehasontology.rampCompoundId = analytehaspathway.rampId"
       )
+
       con <- connectToRaMP()
       backgrounddf <- RMariaDB::dbGetQuery(con, query)
       RMariaDB::dbDisconnect(con)
@@ -152,7 +162,7 @@ runFisherTest <- function(analytes,
 
   # Get the total number of metabolites that are mapped to pathways in RaMP (that's the default background)
   # added conditional to not pull hmdb ids
-  query <- "select * from analytehaspathway where pathwaySource != 'hmdb';"
+  query <- "select distinct rampId, pathwaySource from analytehaspathway where pathwaySource != 'hmdb';"
   con <- connectToRaMP()
   allids <- RMariaDB::dbGetQuery(con, query)
 
@@ -204,9 +214,13 @@ runFisherTest <- function(analytes,
   # Loop through each pathway, build the contingency table, and calculate Fisher's Exact
   # test p-value
   pval <- totinpath <- userinpath <- pidused <- c()
+
+  pidCount = 0
+
   for (i in pid) {
     ids_inpath <- pathwaydf[which(pathwaydf$pathwayRampId == i), "rampId"]
 
+    pidCount = pidCount + 1
 
     if (analyte_type == "metabolites") {
       # Check to make sure that this pathway does have metabolites
@@ -219,13 +233,15 @@ runFisherTest <- function(analytes,
           bg_in_pathway <- length(unique(grep("RAMP_C", ids_inpath_bg, value = TRUE)))
         }
       }
-      inputkegg <- segregated_id_list[[1]][1][[1]]
-      inputreact <- segregated_id_list[[1]][2][[1]]
-      inputwiki <- segregated_id_list[[1]][3][[1]]
-      inputcustom <- segregated_id_list[[1]][[4]]
-      tot_user_analytes <- length(grep("RAMP_C", unique(pathwaydf$rampId)))
-      if (background_type != "database") {
-        tot_bg_analytes <- length(grep("RAMP_C", unique(backgrounddf$rampId)))
+      if(pidCount == 1) {
+        inputkegg <- segregated_id_list[[1]][1][[1]]
+        inputreact <- segregated_id_list[[1]][2][[1]]
+        inputwiki <- segregated_id_list[[1]][3][[1]]
+        inputcustom <- segregated_id_list[[1]][[4]]
+        tot_user_analytes <- length(grep("RAMP_C", unique(pathwaydf$rampId)))
+        if (background_type != "database") {
+          tot_bg_analytes <- length(grep("RAMP_C", unique(backgrounddf$rampId)))
+        }
       }
     } else { # if genes
       # Check to make sure that this pathway does have genes
@@ -234,11 +250,13 @@ runFisherTest <- function(analytes,
       } else {
         user_in_pathway <- length(unique(grep("RAMP_G", ids_inpath, value = TRUE)))
       }
-      inputkegg <- segregated_id_list[[2]][1][[1]]
-      inputreact <- segregated_id_list[[2]][2][[1]]
-      inputwiki <- segregated_id_list[[2]][3][[1]]
-      inputcustom <- segregated_id_list[[2]][[4]]
-      tot_user_analytes <- length(grep("RAMP_G", unique(pathwaydf$rampId)))
+      if(pidCount == 1) {
+        inputkegg <- segregated_id_list[[2]][1][[1]]
+        inputreact <- segregated_id_list[[2]][2][[1]]
+        inputwiki <- segregated_id_list[[2]][3][[1]]
+        inputcustom <- segregated_id_list[[2]][[4]]
+        tot_user_analytes <- length(grep("RAMP_G", unique(pathwaydf$rampId)))
+      }
       ## tot_bg_analytes <- length(grep("RAMP_G", unique(backgrounddf$rampId)))
     }
     if ((!is.na(inputkegg$pathwayRampId[1])) && i %in% inputkegg$pathwayRampId) {
diff --git a/R/plottingFunctions.R b/R/plottingFunctions.R
index 751b23f5..58f44de5 100644
--- a/R/plottingFunctions.R
+++ b/R/plottingFunctions.R
@@ -65,15 +65,21 @@ plotCataNetwork <- function(catalyzedf = "") {
 #' @param sig_cutoff Aesthetic, shows pvalue cutoff for significant pathways
 #' @param interactive If TRUE, return interactive plotly object instead of ggplot object
 #' @export
-pathwayResultsPlot <- function(pathwaysSig, pval = "FDR", perc_analyte_overlap = 0.2,
-                                 perc_pathway_overlap = 0.2, min_pathway_tocluster = 3,
+pathwayResultsPlot <- function(pathwaysSig, pval = "FDR", perc_analyte_overlap = 0.5,
+                                 perc_pathway_overlap = 0.5, min_pathway_tocluster = 3,
                                  text_size = 16, sig_cutoff = 0.05, interactive=FALSE) {
+
+  if( !('cluster_assignment' %in% colnames(pathwaysSig$fishresult))) {
     fishClustering <- findCluster(pathwaysSig,
                                   perc_analyte_overlap = perc_analyte_overlap,
                                   perc_pathway_overlap = perc_pathway_overlap,
                                   min_pathway_tocluster = min_pathway_tocluster
-                                  )
-  fishresult <- fishClustering$fishresults
+    )
+    fishresult <- fishClustering$fishresults
+  } else {
+    fishresult <- pathwaysSig$fishresults
+  }
+
   if (pathwaysSig$analyte_type == "genes" | pathwaysSig$analyte_type == "metabolites") {
     inPath <- fishresult$Num_In_Path
     totPath <- fishresult$Total_In_Path

From 8d26ebf43e3c053bbcde0a35369ff6646c8e7e98 Mon Sep 17 00:00:00 2001
From: johnbraisted <jb212828@gmail.com>
Date: Tue, 26 Sep 2023 17:19:56 -0400
Subject: [PATCH 2/6] commit reaction queries

---
 R/rampReactionQueries.R | 251 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 251 insertions(+)
 create mode 100644 R/rampReactionQueries.R

diff --git a/R/rampReactionQueries.R b/R/rampReactionQueries.R
new file mode 100644
index 00000000..377a357e
--- /dev/null
+++ b/R/rampReactionQueries.R
@@ -0,0 +1,251 @@
+# RaMP Reaction Queries
+
+#' getReactionsForAnalytes
+#'
+#' @param analytes list of analytes
+#' @param analyteType analyte type, 'metabolites' (default), 'genes' or 'both'
+#' @param namesOrIds indicates if input analyte list contains identifiers or analyte names
+#' @param onlyHumanMets boolean to only return pathways containing only human metabolites (ChEBI ontology) (dev in progress)
+#' @param humanProtein boolean to only control pathways catalyzed by a human proteins (having human Uniprot) (dev in progress)
+#' @param includeTransportRxns if TRUE, returns metabolic and transport reactions
+#' @param rxnDirs character vector of length > 1, specifying reaction directions to return  c("UN", "LR", "RL", "BD", "ALL"), default = c("UN").
+#'
+#' @return a list of reaction information on each input analyte, separate data.frame for metabolites, genes, and common reactions
+#' @export
+#'
+#' @examples
+getReactionsForAnalytes <- function(analytes, analyteType='metabolites', namesOrIds='ids', onlyHumanMets=F, humanProtein=F, includeTransportRxns=F, rxnDirs=c("UN")) {
+
+  genes <- data.frame()
+  metabolites <- data.frame()
+  mdf <- data.frame()
+  gdf <- data.frame()
+  if(namesOrIds == 'ids') {
+    analyteSourceInfo <- getRampSourceInfoFromAnalyteIDs(analytes)
+    resultGenes <- analyteSourceInfo[analyteSourceInfo$geneOrCompound == 'gene',]
+    resultMets <- analyteSourceInfo[analyteSourceInfo$geneOrCompound == 'compound',]
+
+    if(!is.null(resultMets) && nrow(resultMets)>0) {
+
+      print("Retrieving reactions for compounds")
+
+      mets <- resultMets[,c('sourceId', 'rampId')]
+      rampIds <- unique(unlist(mets$rampId))
+      mdf <- getReactionsForRaMPCompoundIds(rampCompoundIds=rampIds, onlyHumanMets=onlyHumanMets, humanProtein=humanProtein, includeTransportRxns=includeTransportRxns, rxnDirs=rxnDirs)
+      if(nrow(mdf) > 0) {
+        mdf <- merge(mets, mdf, by.x='rampId', by.y='ramp_cmpd_id')
+      }
+    }
+
+    if(!is.null(resultGenes) && nrow(resultGenes)>0) {
+
+      print("Retrieving reactions for genes/proteins")
+
+      genes <- resultGenes[,c('sourceId', 'rampId')]
+      rampIds <- unique(unlist(genes$rampId))
+      gdf <- getReactionsForRaMPGeneIds(rampGeneIds=rampIds, onlyHumanMets=onlyHumanMets, humanProtein=humanProtein, includeTransportRxns)
+      if(nrow(gdf) > 0) {
+        gdf <- merge(genes, gdf, by.x='rampId', by.y='ramp_gene_id')
+      }
+    }
+
+    # if(!is.null(resultMets) && ncol(resultMets)>0) {
+    #   metatabolites <- resultMets[,c('sourceId', 'rampId')]
+    # }
+
+  } else { # query on names
+    # do we have a helper function to get ramp ids from names?
+
+  }
+
+  resultList <- list()
+  resultList[['met2rxn']] <- mdf
+  resultList[['prot2rxn']] <- gdf
+  resultList[['metProteinCommonReactions']] <- data.frame()
+
+  # find common reactions
+  if(nrow(mdf) > 0 && nrow(gdf) > 0) {
+    mRampRxnIds <- unlist(mdf$ramp_rxn_id)
+    gRampRxnIds <- unlist(gdf$ramp_rxn_id)
+    commonRxnIds <- intersect(mRampRxnIds, gRampRxnIds)
+    if(length(commonRxnIds) > 0) {
+      mRxn <- mdf[mdf$ramp_rxn_id %in% commonRxnIds,]
+      gRxn <- gdf[gdf$ramp_rxn_id %in% commonRxnIds,]
+
+      # one row per reaction, include concat mets, concat proteins
+      # probably a more streamlined way to do this...
+      commonRampRxnList <- c()
+      commonRxnList <- c()
+      mRxnSourceIds <- c()
+      mRxnSourceNames <- c()
+      gRxnSourceIds <- c()
+      gRxnSourceName <- c()
+      gRxnUniprot <- c()
+      commonRxnLabel <- c()
+      commonRxnHTMLEq <- c()
+      hasHumanProtein <- c()
+      onlyHumanMets <- c()
+      isTransport <- c()
+      rxnDir <- c()
+      for(rxn in commonRxnIds) {
+        commonRampRxnList <- c(commonRampRxnList, rxn)
+        commonRxnList <- c(commonRxnList, paste0(unique(mRxn$rxn_source_id[mRxn$ramp_rxn_id == rxn]), collapse = ','))
+        mRxnSourceIds <- c(mRxnSourceIds, paste0(unique(mRxn$sourceId[mRxn$ramp_rxn_id == rxn]), collapse = ','))
+        mRxnSourceNames <- c(mRxnSourceNames, paste0(unique(mRxn$met_name[mRxn$ramp_rxn_id == rxn]), collapse = ','))
+
+        gRxnSourceIds <- c(gRxnSourceIds, paste0(unique(gRxn$sourceId[gRxn$ramp_rxn_id == rxn]), collapse = ','))
+        gRxnUniprot <- c(gRxnUniprot, paste0(unique(gRxn$uniprot[gRxn$ramp_rxn_id == rxn]), collapse = ','))
+        gRxnSourceName <- c(gRxnSourceName, paste0(unique(gRxn$protein_name[gRxn$ramp_rxn_id == rxn]), collapse = ','))
+
+        rxnDir <- c(rxnDir, paste0(unique(mRxn$direction[mRxn$ramp_rxn_id == rxn]), collapse = ','))
+
+        commonRxnLabel <- c(commonRxnLabel, paste0(unique(mRxn$label[mRxn$ramp_rxn_id == rxn]), collapse = ','))
+        commonRxnHTMLEq <- c(commonRxnHTMLEq, paste0(unique(mRxn$html_equation[mRxn$ramp_rxn_id == rxn]), collapse = ','))
+
+        isTransport <- c(isTransport, paste0(unique(mRxn$is_transport[mRxn$ramp_rxn_id == rxn]), collapse = ','))
+        hasHumanProtein <- c(hasHumanProtein, paste0(unique(mRxn$has_human_prot[mRxn$ramp_rxn_id == rxn]), collapse = ','))
+        onlyHumanMets <- c(onlyHumanMets, paste0(unique(mRxn$only_human_mets[mRxn$ramp_rxn_id == rxn]), collapse = ','))
+      }
+      commonReactions <- data.frame(list('metabolites' = mRxnSourceIds, 'met_names' = mRxnSourceNames ,'genes' = gRxnSourceIds,
+                                         'uniprot' = gRxnUniprot, 'proteinNames' = gRxnSourceName,
+                                         'reactionId' = commonRxnList, 'rxn_dir' = rxnDir,'rxn_label' = commonRxnLabel,
+                                         'rxn_html_label' = commonRxnHTMLEq,
+                                         'is_transport' = isTransport,
+                                         'has_human_protein' = hasHumanProtein,
+                                         'only_human_mets' = onlyHumanMets))
+
+      resultList[['metProteinCommonReactions']] <- commonReactions
+    }
+  }
+
+  return(resultList)
+}
+
+#' getReactionsForRaMPCompoundIds returns reactions for a collection of ramp compound ids
+#'
+#' @param rampCompoundIds list of ramp compound ids
+#' @param onlyHumanMets boolean to only return pathways containing only human metabolites (ChEBI ontology) (dev in progress)
+#' @param humanProtein boolean to only control pathways catalyzed by a human proteins (having human Uniprot) (dev in progress)
+#' @param includeTransportRxns if TRUE, returns metabolic and transport reactions
+#'
+#' @return returns a dataframe of reaction information for each ramp compound id
+#'
+#' @examples
+getReactionsForRaMPCompoundIds <- function(rampCompoundIds, onlyHumanMets=F, humanProtein=F, includeTransportRxns=F, rxnDirs=c("UN")) {
+
+  idStr <- listToQueryString(rampCompoundIds)
+  query <- paste0("select mr.ramp_rxn_id, mr.ramp_cmpd_id, mr.met_source_id, mr.substrate_product, mr.is_cofactor, mr.met_name,
+  mr.ramp_rxn_id, rxn.rxn_source_id, rxn.is_transport, rxn.label, rxn.direction, rxn.equation, rxn.html_equation, rxn.ec_num, rxn.has_human_prot, rxn.only_human_mets
+  from reaction2met mr, reaction rxn
+  where mr.ramp_cmpd_id in (",idStr,") and rxn.ramp_rxn_id = mr.ramp_rxn_id")
+
+  if(length(rxnDirs) == 1) {
+    query <- paste0(query, " and rxn.direction = '",rxnDirs[1],"'")
+  } else if(length(rxnDirs)>1) {
+    query <- paste0(query, " and rxn.direction in (",listToQueryString(rxnDirs),")")
+  } else {
+    print("rxnDirs must be of length > 0")
+  }
+
+  if(humanProtein) {
+    query <- paste0(query," and rxn.has_human_prot = 1")
+  }
+
+  if(onlyHumanMets) {
+    query <- paste0(query, " and rxn.only_human_mets = 1")
+  }
+
+  if(!includeTransportRxns) {
+    query <- paste0(query, " and rxn.is_transport = 0")
+  }
+
+  df <- RaMP::runQuery(query)
+
+  return(df)
+}
+
+
+
+#' getReactionsForRaMPGeneIds returns reactions for a collection of ramp compound ids
+#'
+#' @param rampGeneIds list of ramp compound ids
+#' @param onlyHumanMets boolean to only return pathways containing only human metabolites (ChEBI ontology) (dev in progress)
+#' @param humanProtein boolean to only control pathways catalyzed by a human proteins (having human Uniprot) (dev in progress)
+#' @param includeTransportRxns if TRUE, returns metabolic and transport reactions
+#'
+#' @return returns a dataframe of reaction information for each ramp compound id
+#'
+#' @examples
+getReactionsForRaMPGeneIds <- function(rampGeneIds, onlyHumanMets=F, humanProtein=F, includeTransportRxns=F, rxnDirs=c("UN")) {
+
+  idStr <- listToQueryString(rampGeneIds)
+  query <- paste0("select gr.ramp_rxn_id, gr.ramp_gene_id, gr.uniprot, gr.protein_name,
+  gr.ramp_rxn_id, rxn.rxn_source_id, rxn.is_transport, rxn.label, rxn.direction, rxn.equation, rxn.html_equation, rxn.ec_num, rxn.has_human_prot, rxn.only_human_mets
+  from reaction2protein gr, reaction rxn
+  where gr.ramp_gene_id in (",idStr,") and rxn.ramp_rxn_id = gr.ramp_rxn_id")
+
+  if(length(rxnDirs) == 1) {
+    query <- paste0(query, " and rxn.direction = '",rxnDirs[1],"'")
+  } else if(length(rxnDirs)>1) {
+    query <- paste0(query, " and rxn.direction in (",listToQueryString(rxnDirs),")")
+  } else {
+    print("rxnDirs must be of length > 0")
+  }
+
+  if(humanProtein) {
+    query <- paste0(query, " and rxn.has_human_prot = 1")
+  }
+
+  if(onlyHumanMets) {
+    query <- paste0(query, " and rxn.only_human_mets = 1")
+  }
+
+  if(!includeTransportRxns) {
+    query <- paste0(query, " and rxn.is_transport = 0")
+  }
+
+  df <- RaMP::runQuery(query)
+
+  return(df)
+}
+
+
+#####################################################
+###################################
+#
+# general dev note... perhaps the functions below should be moved to rampQueryHelpers
+#
+##
+
+
+
+#' getRampSourceInfoFromAnalyteIDs Utility method to extract source table information from analyte ids
+#'
+#' @param analytes list of analyte ids
+#'
+#' @return returns a dataframe of ramp analyte source information
+#'
+#' @examples
+getRampSourceInfoFromAnalyteIDs <- function(analytes) {
+
+  analyteStr <- listToQueryString(analytes)
+
+  query = paste("select distinct sourceId, rampId, geneOrCompound from source where sourceId in (",analyteStr,")")
+
+  df <- RaMP::runQuery(query)
+
+  return(df)
+}
+
+
+#' listToQueryString utility method to convert an id list to a comma separate string, with single quoted values.
+#'
+#' @param analytes list of analytes (can be names or ids)
+#'
+#' @return comma separated list of single quoted analyte ids or names
+#'
+#' @examples
+listToQueryString <- function(analytes) {
+  analyteStr <- paste0("'", paste0(analytes, collapse = "','"), "'", sep="")
+  return (analyteStr)
+}

From 65e777e6f1fe52120c00c404093230f0f6de1597 Mon Sep 17 00:00:00 2001
From: johnbraisted <jb212828@gmail.com>
Date: Mon, 2 Oct 2023 09:34:12 -0400
Subject: [PATCH 3/6] reaction queries

---
 R/rampReactionQueries.R | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/R/rampReactionQueries.R b/R/rampReactionQueries.R
index 377a357e..9db883b4 100644
--- a/R/rampReactionQueries.R
+++ b/R/rampReactionQueries.R
@@ -159,8 +159,9 @@ getReactionsForRaMPCompoundIds <- function(rampCompoundIds, onlyHumanMets=F, hum
     query <- paste0(query, " and rxn.is_transport = 0")
   }
 
-  df <- RaMP::runQuery(query)
-
+  conn <- connectToRaMP()
+  df <- RMariaDB::dbGetQuery(conn,query)
+  RMariaDB::dbDisconnect(conn=conn)
   return(df)
 }
 
@@ -204,8 +205,9 @@ getReactionsForRaMPGeneIds <- function(rampGeneIds, onlyHumanMets=F, humanProtei
     query <- paste0(query, " and rxn.is_transport = 0")
   }
 
-  df <- RaMP::runQuery(query)
-
+  conn <- connectToRaMP()
+  df <- RMariaDB::dbGetQuery(conn,query)
+  RMariaDB::dbDisconnect(conn=conn)
   return(df)
 }
 
@@ -232,7 +234,9 @@ getRampSourceInfoFromAnalyteIDs <- function(analytes) {
 
   query = paste("select distinct sourceId, rampId, geneOrCompound from source where sourceId in (",analyteStr,")")
 
-  df <- RaMP::runQuery(query)
+  conn <- connectToRaMP()
+  df <- RMariaDB::dbGetQuery(conn,query)
+  RMariaDB::dbDisconnect(conn=conn)
 
   return(df)
 }

From c7eada4b8cc0f2fc9929aa849cbe3df5736e3a50 Mon Sep 17 00:00:00 2001
From: johnbraisted <jb212828@gmail.com>
Date: Thu, 12 Oct 2023 09:56:13 -0400
Subject: [PATCH 4/6] add new Rhea reaction queries

---
 NAMESPACE                              |  1 +
 R/ReturnPathwaysEnrich_InputAnalytes.R | 14 ++++++++------
 R/rampReactionQueries.R                | 10 ----------
 man/pathwayResultsPlot.Rd              |  4 ++--
 4 files changed, 11 insertions(+), 18 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 322e5923..7d4b2537 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -19,6 +19,7 @@ export(getPathwayFromAnalyte)
 export(getPathwayNameList)
 export(getPrefixesFromAnalytes)
 export(getRaMPAnalyteIntersections)
+export(getReactionsForAnalytes)
 export(pathwayResultsPlot)
 export(plotCataNetwork)
 export(rampFastCata)
diff --git a/R/ReturnPathwaysEnrich_InputAnalytes.R b/R/ReturnPathwaysEnrich_InputAnalytes.R
index a0010eca..0779e4fb 100644
--- a/R/ReturnPathwaysEnrich_InputAnalytes.R
+++ b/R/ReturnPathwaysEnrich_InputAnalytes.R
@@ -893,12 +893,14 @@ getPathwayFromAnalyte <- function(analytes = "none",
   return(df2)
 }
 
-
-##' @param analytes a vector of analytes (genes or metabolites) that need to be searched
-##' @param pathways If "RaMP" (default), use pathway definitions within RaMP-DB. Else, supply path to gmx file containing custom pathway definitions. GMX files are a tab-separated format that contain one analyte set per column, with the name of the set in the first row, and constituent analytes in subsequent rows
-##' @param analyte_type "genes" or "metabolites"
-##' @return A pathwaydf compatible with runFisherTest
-##' @author Andrew Patt
+#' Reads an excel file containing metabolite and gene annotations and filters pathways
+#' to those pathways represented by the input analytes.
+#'
+#' @param analytes a vector of analytes (genes or metabolites) that need to be searched
+#' @param pathways If "RaMP" (default), use pathway definitions within RaMP-DB. Else, supply path to gmx file containing custom pathway definitions. GMX files are a tab-separated format that contain one analyte set per column, with the name of the set in the first row, and constituent analytes in subsequent rows
+#' @param analyte_type "genes" or "metabolites"
+#' @return A pathwaydf compatible with runFisherTest
+#' @author Andrew Patt
 getCustomPathwayFromAnalyte <- function(analytes, pathways, analyte_type) {
   print("Starting getCustomPathwayFromAnalyte()")
   tryCatch(
diff --git a/R/rampReactionQueries.R b/R/rampReactionQueries.R
index 9db883b4..a82c9a96 100644
--- a/R/rampReactionQueries.R
+++ b/R/rampReactionQueries.R
@@ -12,8 +12,6 @@
 #'
 #' @return a list of reaction information on each input analyte, separate data.frame for metabolites, genes, and common reactions
 #' @export
-#'
-#' @examples
 getReactionsForAnalytes <- function(analytes, analyteType='metabolites', namesOrIds='ids', onlyHumanMets=F, humanProtein=F, includeTransportRxns=F, rxnDirs=c("UN")) {
 
   genes <- data.frame()
@@ -129,8 +127,6 @@ getReactionsForAnalytes <- function(analytes, analyteType='metabolites', namesOr
 #' @param includeTransportRxns if TRUE, returns metabolic and transport reactions
 #'
 #' @return returns a dataframe of reaction information for each ramp compound id
-#'
-#' @examples
 getReactionsForRaMPCompoundIds <- function(rampCompoundIds, onlyHumanMets=F, humanProtein=F, includeTransportRxns=F, rxnDirs=c("UN")) {
 
   idStr <- listToQueryString(rampCompoundIds)
@@ -175,8 +171,6 @@ getReactionsForRaMPCompoundIds <- function(rampCompoundIds, onlyHumanMets=F, hum
 #' @param includeTransportRxns if TRUE, returns metabolic and transport reactions
 #'
 #' @return returns a dataframe of reaction information for each ramp compound id
-#'
-#' @examples
 getReactionsForRaMPGeneIds <- function(rampGeneIds, onlyHumanMets=F, humanProtein=F, includeTransportRxns=F, rxnDirs=c("UN")) {
 
   idStr <- listToQueryString(rampGeneIds)
@@ -226,8 +220,6 @@ getReactionsForRaMPGeneIds <- function(rampGeneIds, onlyHumanMets=F, humanProtei
 #' @param analytes list of analyte ids
 #'
 #' @return returns a dataframe of ramp analyte source information
-#'
-#' @examples
 getRampSourceInfoFromAnalyteIDs <- function(analytes) {
 
   analyteStr <- listToQueryString(analytes)
@@ -247,8 +239,6 @@ getRampSourceInfoFromAnalyteIDs <- function(analytes) {
 #' @param analytes list of analytes (can be names or ids)
 #'
 #' @return comma separated list of single quoted analyte ids or names
-#'
-#' @examples
 listToQueryString <- function(analytes) {
   analyteStr <- paste0("'", paste0(analytes, collapse = "','"), "'", sep="")
   return (analyteStr)
diff --git a/man/pathwayResultsPlot.Rd b/man/pathwayResultsPlot.Rd
index 73f39d76..16019c54 100644
--- a/man/pathwayResultsPlot.Rd
+++ b/man/pathwayResultsPlot.Rd
@@ -7,8 +7,8 @@
 pathwayResultsPlot(
   pathwaysSig,
   pval = "FDR",
-  perc_analyte_overlap = 0.2,
-  perc_pathway_overlap = 0.2,
+  perc_analyte_overlap = 0.5,
+  perc_pathway_overlap = 0.5,
   min_pathway_tocluster = 3,
   text_size = 16,
   sig_cutoff = 0.05,

From 8f96dd2f03d53a35a999114dc5ba78dd5963726e Mon Sep 17 00:00:00 2001
From: johnbraisted <jb212828@gmail.com>
Date: Thu, 12 Oct 2023 10:14:37 -0400
Subject: [PATCH 5/6] add reaction docs and tests

---
 man/getCustomPathwayFromAnalyte.Rd          | 26 ++++++++
 man/getRampSourceInfoFromAnalyteIDs.Rd      | 17 +++++
 man/getReactionsForAnalytes.Rd              | 37 +++++++++++
 man/getReactionsForRaMPCompoundIds.Rd       | 29 ++++++++
 man/getReactionsForRaMPGeneIds.Rd           | 29 ++++++++
 man/listToQueryString.Rd                    | 17 +++++
 tests/testthat/test-analytes_to_reactions.R | 73 +++++++++++++++++++++
 7 files changed, 228 insertions(+)
 create mode 100644 man/getCustomPathwayFromAnalyte.Rd
 create mode 100644 man/getRampSourceInfoFromAnalyteIDs.Rd
 create mode 100644 man/getReactionsForAnalytes.Rd
 create mode 100644 man/getReactionsForRaMPCompoundIds.Rd
 create mode 100644 man/getReactionsForRaMPGeneIds.Rd
 create mode 100644 man/listToQueryString.Rd
 create mode 100644 tests/testthat/test-analytes_to_reactions.R

diff --git a/man/getCustomPathwayFromAnalyte.Rd b/man/getCustomPathwayFromAnalyte.Rd
new file mode 100644
index 00000000..21d95cb6
--- /dev/null
+++ b/man/getCustomPathwayFromAnalyte.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ReturnPathwaysEnrich_InputAnalytes.R
+\name{getCustomPathwayFromAnalyte}
+\alias{getCustomPathwayFromAnalyte}
+\title{Reads an excel file containing metabolite and gene annotations and filters pathways
+to those pathways represented by the input analytes.}
+\usage{
+getCustomPathwayFromAnalyte(analytes, pathways, analyte_type)
+}
+\arguments{
+\item{analytes}{a vector of analytes (genes or metabolites) that need to be searched}
+
+\item{pathways}{If "RaMP" (default), use pathway definitions within RaMP-DB. Else, supply path to gmx file containing custom pathway definitions. GMX files are a tab-separated format that contain one analyte set per column, with the name of the set in the first row, and constituent analytes in subsequent rows}
+
+\item{analyte_type}{"genes" or "metabolites"}
+}
+\value{
+A pathwaydf compatible with runFisherTest
+}
+\description{
+Reads an excel file containing metabolite and gene annotations and filters pathways
+to those pathways represented by the input analytes.
+}
+\author{
+Andrew Patt
+}
diff --git a/man/getRampSourceInfoFromAnalyteIDs.Rd b/man/getRampSourceInfoFromAnalyteIDs.Rd
new file mode 100644
index 00000000..8fd3aef7
--- /dev/null
+++ b/man/getRampSourceInfoFromAnalyteIDs.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rampReactionQueries.R
+\name{getRampSourceInfoFromAnalyteIDs}
+\alias{getRampSourceInfoFromAnalyteIDs}
+\title{getRampSourceInfoFromAnalyteIDs Utility method to extract source table information from analyte ids}
+\usage{
+getRampSourceInfoFromAnalyteIDs(analytes)
+}
+\arguments{
+\item{analytes}{list of analyte ids}
+}
+\value{
+returns a dataframe of ramp analyte source information
+}
+\description{
+getRampSourceInfoFromAnalyteIDs Utility method to extract source table information from analyte ids
+}
diff --git a/man/getReactionsForAnalytes.Rd b/man/getReactionsForAnalytes.Rd
new file mode 100644
index 00000000..3e773e30
--- /dev/null
+++ b/man/getReactionsForAnalytes.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rampReactionQueries.R
+\name{getReactionsForAnalytes}
+\alias{getReactionsForAnalytes}
+\title{getReactionsForAnalytes}
+\usage{
+getReactionsForAnalytes(
+  analytes,
+  analyteType = "metabolites",
+  namesOrIds = "ids",
+  onlyHumanMets = F,
+  humanProtein = F,
+  includeTransportRxns = F,
+  rxnDirs = c("UN")
+)
+}
+\arguments{
+\item{analytes}{list of analytes}
+
+\item{analyteType}{analyte type, 'metabolites' (default), 'genes' or 'both'}
+
+\item{namesOrIds}{indicates if input analyte list contains identifiers or analyte names}
+
+\item{onlyHumanMets}{boolean to only return pathways containing only human metabolites (ChEBI ontology) (dev in progress)}
+
+\item{humanProtein}{boolean to only control pathways catalyzed by a human proteins (having human Uniprot) (dev in progress)}
+
+\item{includeTransportRxns}{if TRUE, returns metabolic and transport reactions}
+
+\item{rxnDirs}{character vector of length > 1, specifying reaction directions to return  c("UN", "LR", "RL", "BD", "ALL"), default = c("UN").}
+}
+\value{
+a list of reaction information on each input analyte, separate data.frame for metabolites, genes, and common reactions
+}
+\description{
+getReactionsForAnalytes
+}
diff --git a/man/getReactionsForRaMPCompoundIds.Rd b/man/getReactionsForRaMPCompoundIds.Rd
new file mode 100644
index 00000000..3e4eca19
--- /dev/null
+++ b/man/getReactionsForRaMPCompoundIds.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rampReactionQueries.R
+\name{getReactionsForRaMPCompoundIds}
+\alias{getReactionsForRaMPCompoundIds}
+\title{getReactionsForRaMPCompoundIds returns reactions for a collection of ramp compound ids}
+\usage{
+getReactionsForRaMPCompoundIds(
+  rampCompoundIds,
+  onlyHumanMets = F,
+  humanProtein = F,
+  includeTransportRxns = F,
+  rxnDirs = c("UN")
+)
+}
+\arguments{
+\item{rampCompoundIds}{list of ramp compound ids}
+
+\item{onlyHumanMets}{boolean to only return pathways containing only human metabolites (ChEBI ontology) (dev in progress)}
+
+\item{humanProtein}{boolean to only control pathways catalyzed by a human proteins (having human Uniprot) (dev in progress)}
+
+\item{includeTransportRxns}{if TRUE, returns metabolic and transport reactions}
+}
+\value{
+returns a dataframe of reaction information for each ramp compound id
+}
+\description{
+getReactionsForRaMPCompoundIds returns reactions for a collection of ramp compound ids
+}
diff --git a/man/getReactionsForRaMPGeneIds.Rd b/man/getReactionsForRaMPGeneIds.Rd
new file mode 100644
index 00000000..828da5d6
--- /dev/null
+++ b/man/getReactionsForRaMPGeneIds.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rampReactionQueries.R
+\name{getReactionsForRaMPGeneIds}
+\alias{getReactionsForRaMPGeneIds}
+\title{getReactionsForRaMPGeneIds returns reactions for a collection of ramp compound ids}
+\usage{
+getReactionsForRaMPGeneIds(
+  rampGeneIds,
+  onlyHumanMets = F,
+  humanProtein = F,
+  includeTransportRxns = F,
+  rxnDirs = c("UN")
+)
+}
+\arguments{
+\item{rampGeneIds}{list of ramp compound ids}
+
+\item{onlyHumanMets}{boolean to only return pathways containing only human metabolites (ChEBI ontology) (dev in progress)}
+
+\item{humanProtein}{boolean to only control pathways catalyzed by a human proteins (having human Uniprot) (dev in progress)}
+
+\item{includeTransportRxns}{if TRUE, returns metabolic and transport reactions}
+}
+\value{
+returns a dataframe of reaction information for each ramp compound id
+}
+\description{
+getReactionsForRaMPGeneIds returns reactions for a collection of ramp compound ids
+}
diff --git a/man/listToQueryString.Rd b/man/listToQueryString.Rd
new file mode 100644
index 00000000..1dc303bf
--- /dev/null
+++ b/man/listToQueryString.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rampReactionQueries.R
+\name{listToQueryString}
+\alias{listToQueryString}
+\title{listToQueryString utility method to convert an id list to a comma separate string, with single quoted values.}
+\usage{
+listToQueryString(analytes)
+}
+\arguments{
+\item{analytes}{list of analytes (can be names or ids)}
+}
+\value{
+comma separated list of single quoted analyte ids or names
+}
+\description{
+listToQueryString utility method to convert an id list to a comma separate string, with single quoted values.
+}
diff --git a/tests/testthat/test-analytes_to_reactions.R b/tests/testthat/test-analytes_to_reactions.R
new file mode 100644
index 00000000..d3b9db5f
--- /dev/null
+++ b/tests/testthat/test-analytes_to_reactions.R
@@ -0,0 +1,73 @@
+test_that("analytes_to_reactions", {
+  library(properties)
+  dbpass <- properties::read.properties('../../dbprops.txt')
+  pkg.globals <- setConnectionToRaMP(host=dbpass$hostname, dbname=dbpass$dbname, username=dbpass$username, conpass=dbpass$conpass)
+  assign("pkg.globals", pkg.globals, envir = .GlobalEnv)
+
+  analytes = c("uniprot:P04406", "uniprot:P00338", "uniprot:P11413", "uniprot:Q99798", "uniprot:P08559",
+               "chebi:15361", "chebi:14314", "chebi:24996")
+
+  rxnResult <- RaMP::getReactionsForAnalytes(analytes=analytes, analyteType="both", humanProtein = T)
+
+  expect_true(ncol(rxnResult[[1]]) == 17, label="met2rxn 17 columns")
+
+  expect_true(nrow(rxnResult[[1]]) > 0, label = "have met2rxn results")
+
+  expect_true(ncol(rxnResult[[2]]) == 15, label="prot2rxn 15 columns")
+
+  expect_true(nrow(rxnResult[[2]]) > 0, label = "have prot2rxn results")
+
+  expect_true(ncol(rxnResult[[3]]) == 12, label="mp_common_rxn 12 columns")
+
+  expect_true(nrow(rxnResult[[3]]) > 0, label = "have mp_common_rxn results")
+})
+
+
+test_that("metabolites_to_reactions", {
+  library(properties)
+  dbpass <- properties::read.properties('../../dbprops.txt')
+  pkg.globals <- setConnectionToRaMP(host=dbpass$hostname, dbname=dbpass$dbname, username=dbpass$username, conpass=dbpass$conpass)
+  assign("pkg.globals", pkg.globals, envir = .GlobalEnv)
+
+  analytes = c("chebi:15361", "chebi:14314", "chebi:24996")
+
+  rxnResult <- RaMP::getReactionsForAnalytes(analytes=analytes, analyteType="metabolites", humanProtein = T)
+
+  expect_true(ncol(rxnResult[[1]]) == 17, label="met2rxn 17 columns")
+
+  expect_true(nrow(rxnResult[[1]]) > 0, label = "have met2rxn results")
+
+  expect_true(ncol(rxnResult[[2]]) == 0, label="empty prot2rxn 15 columns")
+
+  expect_true(nrow(rxnResult[[2]]) == 0, label = "empty have prot2rxn results")
+
+  expect_true(ncol(rxnResult[[3]]) == 0, label="empty mp_common_rxn")
+
+  expect_true(nrow(rxnResult[[3]]) == 0, label = "empty mp_common_rxn")
+})
+
+
+test_that("proteins_to_reactions", {
+  library(properties)
+  dbpass <- properties::read.properties('../../dbprops.txt')
+  pkg.globals <- setConnectionToRaMP(host=dbpass$hostname, dbname=dbpass$dbname, username=dbpass$username, conpass=dbpass$conpass)
+  assign("pkg.globals", pkg.globals, envir = .GlobalEnv)
+
+  analytes = c("uniprot:P04406", "uniprot:P00338", "uniprot:P11413", "uniprot:Q99798", "uniprot:P08559")
+
+  rxnResult <- RaMP::getReactionsForAnalytes(analytes=analytes, analyteType="both", humanProtein = T)
+
+  expect_true(ncol(rxnResult[[1]]) == 0, label="empty met2rxn")
+
+  expect_true(nrow(rxnResult[[1]]) == 0, label = "empty met2rxn")
+
+  expect_true(ncol(rxnResult[[2]]) == 15, label="prot2rxn 15 columns")
+
+  expect_true(nrow(rxnResult[[2]]) > 0, label = "have prot2rxn results")
+
+  expect_true(ncol(rxnResult[[3]]) == 0, label="empty mp_common_rxn")
+
+  expect_true(nrow(rxnResult[[3]]) == 0, label = "empty mp_common_rxn")
+})
+
+

From 8dd8f0ab6a4dd35079166d3290857280669a1f4b Mon Sep 17 00:00:00 2001
From: johnbraisted <jb212828@gmail.com>
Date: Thu, 12 Oct 2023 10:17:11 -0400
Subject: [PATCH 6/6] update version to 2.3.3

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 0669de47..88c906bc 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: RaMP
 Title: RaMP (Relational Database of Metabolomic Pathways)
 Type: Package
-Version: 2.3.2
+Version: 2.3.3
 License: GPL-2
 Depends: R (>= 3.6.0)
 Authors@R: c(