Merge pull request #35 from ncats/dev

Dev
ncats · May 4, 2022 · b1fb785 · b1fb785
2 parents e74d21c + 29d65da
commit b1fb785
Show file tree

Hide file tree

Showing 16 changed files with 136 additions and 35 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -31,7 +31,9 @@ Imports:
     stringi,
     RMariaDB,
     DBI,
-    jsonlite
+    jsonlite,
+    methods,
+    tibble
 Encoding: UTF-8
 RoxygenNote: 7.1.2
 Suggests: 

diff --git a/NAMESPACE b/NAMESPACE
@@ -16,6 +16,7 @@ export(getMetabClassTypes)
 export(getOntoFromMeta)
 export(getOntologies)
 export(getPathwayFromAnalyte)
+export(getPathwayNameList)
 export(getPrefixesFromAnalytes)
 export(getRaMPAnalyteIntersections)
 export(pathwayResultsPlot)

diff --git a/R/ReturnAnalytes_InputPathways.R b/R/ReturnAnalytes_InputPathways.R
@@ -2,6 +2,7 @@
 #'
 #' @param pathway a string or a vector of strings that contains pathways of interest
 #' @param analyte_type a string denoting the type of analyte to return ("gene", "metabolite", "both")
+#' @param match type of matching to use, options are "exact" or "fuzzy".  The default is "exact".
 #' @return a data.frame that contains all search results
 #' @examples
 #' \dontrun{
@@ -14,7 +15,7 @@
 #'	"sphingolipid metabolism"))
 #' }
 #' @export
-getAnalyteFromPathway <- function(pathway, analyte_type="both") {
+getAnalyteFromPathway <- function(pathway, match="exact", analyte_type="both") {
   now <- proc.time()
   print("fired")
   if(is.character(pathway)){
@@ -34,21 +35,42 @@ getAnalyteFromPathway <- function(pathway, analyte_type="both") {
   }
   list_pathway <- sapply(list_pathway,shQuote)
   list_pathway <- paste(list_pathway,collapse = ",")
-  # Retrieve pathway RaMP id
-  con <- connectToRaMP()
-  query1 <- paste0("select * from pathway where pathwayName
+
+  # Retrieve pathway RaMP ids
+  if (match=='exact') {
+  	query1 <- paste0("select * from pathway where pathwayName
                    in (",list_pathway,");")
-
-  df1 <- RMariaDB::dbGetQuery(con,query1)
-  RMariaDB::dbDisconnect(con)
+	con <- connectToRaMP()
+	df1 <- RMariaDB::dbGetQuery(con,query1)
+	RMariaDB::dbDisconnect(con)
+  } else if (match=='fuzzy') {
+	print("running fuzzy")
+	df1=c()
+	for (i in 1:length(pathway)) { 
+		# note here that we are using pathway, not list_pathway which 
+		# formats for 'exact' but not 'fuzzy'
+		con <- connectToRaMP()
+		query1 <- paste0('select * from pathway where pathwayName
+                   like "%',pathway[i],'%";')
+		df1 <- rbind(df1,RMariaDB::dbGetQuery(con,query1))
+		RMariaDB::dbDisconnect(con)
+	}
+  } else {
+	stop("Please be sure to set the match parameter to 'exact' or 'fuzzy'.")
+  }
 
   if(nrow(df1)==0) {
     stop("None of the input pathway(s) could be found")}
 
   # Retrieve compound id from RaMP pathway id (query1)
-  query2 <- paste0("select pathwayRampId,rampId from analytehaspathway where
-                   pathwayRampId in (select pathwayRampId from pathway where
-                   pathwayName in (",list_pathway,"));")
+  #query2 <- paste0("select pathwayRampId,rampId from analytehaspathway where
+  #                 pathwayRampId in (select pathwayRampId from pathway where
+  #                 pathwayName in (",list_pathway,"));")
+  pidlist <- sapply(df1$pathwayRampId,shQuote)
+  pidlist <- paste(pidlist,collapse = ",")
+
+  query2 <- paste0("select pathwayRampId, rampId from analytehaspathway 
+	where pathwayRampId in (",pidlist,");")
   con <- connectToRaMP()
   df2 <- RMariaDB::dbGetQuery(con,query2)
   RMariaDB::dbDisconnect(con)

diff --git a/R/ReturnGeneMetab_SameRxn.R b/R/ReturnGeneMetab_SameRxn.R
@@ -16,6 +16,8 @@
 #' }
 #' @export
 rampFastCata <- function(analytes="none", NameOrIds="ids") {
+
+  rampId <- pathwayRampId <- c()
   if(length(analytes)==1){
     if(analytes=="none"){
       stop("Please provide input analytes")}}
@@ -129,7 +131,7 @@ rampFastCata <- function(analytes="none", NameOrIds="ids") {
     } else {
       # default handling of empty result
       # empty df1 requires use of tibble/tidyr add_column
-      df1 <- add_column(df1, 'query_relation'=NA)
+      df1 <- tibble::add_column(df1, 'query_relation'=NA)
       result <- df1
     }
   }

diff --git a/R/ReturnPathwaysEnrich_InputAnalytes.R b/R/ReturnPathwaysEnrich_InputAnalytes.R
@@ -35,6 +35,8 @@ runFisherTest <- function(analytes,
                                      find_synonym=FALSE
   )
 
+  pathwayRampId <- rampId <- c()
+
   if (analyte_type == "metabolites") {
     pathwaydf <- pathwaydf[grep("RAMP_C_", pathwaydf$rampId), ]
   } else if (analyte_type == "genes") {
@@ -46,7 +48,8 @@ runFisherTest <- function(analytes,
     return(NULL)
   }
 
-  if(class(background_type)=="list"){
+#  if(class(background_type)=="list"){
+   if(is(background_type, "list")){
     background = unlist(background)
   }
 
@@ -57,7 +60,7 @@ runFisherTest <- function(analytes,
     )
     print("Custom background specified, genes will be discarded")
   } else if (background_type=="file") {
-    userbkg <- read.table(background, header=F)[,1]
+    userbkg <- utils::read.table(background, header=F)[,1]
     backgrounddf <- getPathwayFromAnalyte(userbkg,
                                           includeRaMPids = TRUE,
                                           NameOrIds = NameOrIds)
@@ -702,6 +705,8 @@ getPathwayFromAnalyte <- function(analytes = "none",
                                   NameOrIds = "ids",
                                   includeRaMPids = FALSE) {
 
+  rampId <- pathwayRampId <- c()
+
   print("Starting getPathwayFromAnalyte()")
   if (is.null(analytes) || length(analytes) == 0) {
     warning("Input analyte list is NULL or empty. Aborting getPathwayFromAnalyte()")

diff --git a/R/SourceDataFunctions.R b/R/SourceDataFunctions.R
@@ -120,3 +120,22 @@ getRaMPAnalyteIntersections<-function(analyteType='metabolites', format='json',
 
 
 
+#' Retrieve list of pathway names
+#' @return vector of unique pathway names (alphabetically ordered)
+#' @examples
+#' \dontrun{
+#' pkg.globals <- setConnectionToRaMP(dbname="ramp2",username="root",conpass="",host = "localhost")
+#' getPathwayNameList()
+#' }
+#' @export
+getPathwayNameList <- function(){
+  con<-connectToRaMP()
+  query1<-"select pathwayName from pathway;"
+  results<-RMariaDB::dbGetQuery(con,query1)
+  RMariaDB::dbDisconnect(con)
+  return(sort(unique(results$pathwayName)))
+}
+
+
+
+
diff --git a/R/printingFunctions.R b/R/printingFunctions.R
@@ -7,7 +7,8 @@ cleanup<- function(data, show_n_rows = 6) {
   if (class(data) != "data.frame" & (class(data) != "list" & length(data) != 1)) {
     stop("Input should be a dataframe resulting from runCombinedFishersTest, getAnalyteFromPathway, getPathwayFromAnalyte, chemicalClassSurvey, or getChemicalProperties")
   }
-  if (class(data) == "list") {
+#  if (class(data) == "list") {
+   if (is(data, "list")){
     data <- data[[1]]
   }
   rownames(data) <- NULL

diff --git a/R/rampChemClassQueries.R b/R/rampChemClassQueries.R
@@ -67,11 +67,13 @@
 #'}
 #' @export
 chemicalClassSurvey <- function(mets, background = "database", background_type="database", includeRaMPids = FALSE){
+
+
   conn <- connectToRaMP()
   print("Starting Chemical Class Survey")
 
   if(background_type == "file") {
-    bkgrnd <- read.table(background, header=F)[,1]
+    bkgrnd <- utils::read.table(background, header=F)[,1]
 
     filteredMets <- mets[mets %in% bkgrnd]
     print(paste0("Number of input query ids: ",length(mets)))

diff --git a/R/rampQueryHelper.R b/R/rampQueryHelper.R
@@ -711,7 +711,8 @@ FilterFishersResults <- function(fishers_df, pval_type = 'fdr', pval_cutoff = 0.
 
     for(result in names(fishers_df)) {
 
-      if(class(fishers_df[[result]]) == 'data.frame') {
+      #if(class(fishers_df[[result]]) == 'data.frame') {
+       if(is(fishers_df[[result]], 'data.frame')) {
         print(result)
         resultDf <- fishers_df[[result]]
         resultDf <- subset(resultDf, resultDf[[criteriaCol]] <= pval_cutoff)

diff --git a/R/writingFunctions.R b/R/writingFunctions.R
@@ -4,14 +4,14 @@
 #' @param outputfile name of output file
 #' @export
 writePathwaysToCSV <- function(mypathways = "none", outputfile = "none") {
-    if(length(mypathways) == 1){
-        if (mypathways == "") {
-            stop("Be sure to specify the output of the function getPathwayFromAnalyte() and an output file")
-        }}
-    if(length(outputfile) == 1){
-        if (outputfile == "") {
-            stop("Be sure to specify the output of the function getPathwayFromAnalyte() and an output file")
-  }}
+  if(length(mypathways) == 1){
+    if (mypathways == "") {
+      stop("Be sure to specify the output of the function getPathwayFromAnalyte() and an output file")
+    }}
+  if(length(outputfile) == 1){
+    if (outputfile == "") {
+      stop("Be sure to specify the output of the function getPathwayFromAnalyte() and an output file")
+    }}
   if (!all(c(
     "pathwayName", "pathwaySource",
     "pathwayId", "inputId", "commonName"
@@ -34,10 +34,10 @@ writePathwaysToCSV <- function(mypathways = "none", outputfile = "none") {
 #' @export
 write_FishersResults <- function(fishResults = "none", outputfile = "none", rampid = FALSE) {
 
-    if(length(fishResults) == 1){
-        if (fishResults == "") {
-    stop("Be sure to specify the output of the function findCluster()")
-  }}
+  if(length(fishResults) == 1){
+    if (fishResults == "") {
+      stop("Be sure to specify the output of the function findCluster()")
+    }}
   clusters <- fishResults$cluster_list
   if (is.null(clusters)) {
     out <- fishResults$fishresults

diff --git a/README.md b/README.md
@@ -57,7 +57,7 @@ To access, [click here](https://www.mdpi.com/2218-1989/8/1/16)
 ## Installation Instructions
 In order to use this R package locally, you will need the following:
 * The R code under this repo
-* The mysql dump file that contains the RaMP database (in the folder inst/extdata/)
+* The mysql dump file that contains the RaMP database. **[Download here](https://figshare.com/ndownloader/files/34990387).**
 
 If you would like to know how to build RaMP database from scratch, please check another GitHub site at [RaMP-BackEnd](https://github.com/ncats/RaMP-BackEnd)
 
@@ -84,11 +84,12 @@ mysql> exit;
 
 Here, we are naming the database "ramp" but you can use any name you'd like.  It is worth noting though that the R package assumes that the name of the database is "ramp" by default.  So if you change the name, remember to pass that name as arguments in the R package functions.
 
-Second, download and unzip the latest RaMP database from the inst/extdata folder.
+Second, download and unzip the latest RaMP database. **[Download here](https://figshare.com/ndownloader/files/34990387).**
 
-Third, populate the named database with the mysql dump file (which you can get from  inst/extdata/rampXXXXXX.sql, where XXXXXX denotes the latest date):
+Third, populate the named database with the mysql dump file
+Supply the path and file name to the unzipped sql file that you've downloaded.
 ```
-> mysql -u root -p ramp < rampXXXX.sql  
+> mysql -u root -p ramp < /your/file/path/here/ramp_<current_version_id_here>.sql  
 ```
 
 You're done!

diff --git a/inst/extdata/ramp_2.0.6_20220303.sql.gz b/inst/extdata/ramp_2.0.6_20220303.sql.gz
diff --git a/man/getAnalyteFromPathway.Rd b/man/getAnalyteFromPathway.Rd
diff --git a/man/getPathwayNameList.Rd b/man/getPathwayNameList.Rd
diff --git a/tests/testthat/test-chemicalClassEnrichment.R b/tests/testthat/test-chemicalClassEnrichment.R
@@ -8,7 +8,7 @@ test_that("chemical class enrichment data is returned correctly, ChemicalClassEn
                               'hmdb:HMDB0000439',
                               'hmdb:HMDB0000479',
                               'hmdb:HMDB0000532',
-                              'hmdb:HMDB0011211' )
+                              'hmdb:HMDB0011211')
 
 
 

diff --git a/tests/testthat/test-getAnalyteFromPathway.R b/tests/testthat/test-getAnalyteFromPathway.R
@@ -41,7 +41,30 @@ test_that("Table returned shows correct output for multiple pathways ,getAnalyte
 
 
 
+test_that("Fuzzy match test for TCA and Creatine",
+          {
+
+            library(properties)
+            dbpass <- properties::read.properties('../../dbprops.txt')
+
+            pkg.globals <- setConnectionToRaMP(host=dbpass$hostname, dbname=dbpass$dbname, username=dbpass$username, conpass=dbpass$conpass)
+            assign("pkg.globals", pkg.globals, envir = .GlobalEnv)
 
+            my_analytes <-
+              getAnalyteFromPathway(pathway=c(
+                "TCA",
+                "Creatine"
+              ), match="fuzzy")
+
+            print(dim(my_analytes))
+            print(unique(my_analytes$pathwayName))
+
+            expect_true(
+              !is.null(my_analytes)
+            )
+            expect_true(
+              NROW(my_analytes) != 0)
+          })