Skip to content

Commit

Permalink
add version parameter to R Package (#46)
Browse files Browse the repository at this point in the history
  • Loading branch information
iblacksand authored May 29, 2024
1 parent 481ea5e commit 379fe46
Show file tree
Hide file tree
Showing 9 changed files with 227 additions and 208 deletions.
146 changes: 76 additions & 70 deletions R/WebGestaltRNta.R
Original file line number Diff line number Diff line change
@@ -1,86 +1,92 @@
#' @importFrom httr POST content
#' @importFrom readr read_tsv cols
#' @importFrom jsonlite toJSON
WebGestaltRNta <- function(organism="hsapiens", network="network_PPI_BIOGRID", method="Network_Retrieval_Prioritization", inputSeed, inputSeedFile, interestGeneType="genesymbol", neighborNum=10, highlightSeedNum=10, sigMethod="fdr", fdrThr=0.05, topThr=10, highlightType="Seeds", outputDirectory=getwd(), projectName=NULL, cache=NULL, hostName="https://www.webgestalt.org/") {
projectDir <- file.path(outputDirectory, paste0("Project_", projectName))
# if (network == "network_FunMap") {
# network <- "network_FunMap_DenseModules"
# }
dir.create(projectDir)
WebGestaltRNta <- function(organism = "hsapiens", network = "network_PPI_BIOGRID", method = "Network_Retrieval_Prioritization", inputSeed, inputSeedFile, interestGeneType = "genesymbol", neighborNum = 10, highlightSeedNum = 10, sigMethod = "fdr", fdrThr = 0.05, topThr = 10, highlightType = "Seeds", outputDirectory = getwd(), projectName = NULL, cache = NULL, hostName = "https://www.webgestalt.org/") {
projectDir <- file.path(outputDirectory, paste0("Project_", projectName))
# if (network == "network_FunMap") {
# network <- "network_FunMap_DenseModules"
# }
dir.create(projectDir)

if (length(network) > 1) {
stop("NTA does not support multiple databases.")
}
inputGene <- formatCheck("list", inputGeneFile=inputSeedFile, inputGene=inputSeed)
# only networks are in gene symbols
# mapping always returns gene symbol, could map to genesymbol but takes two requests
inputGene <- idMappingGene(organism=organism, dataType="list", inputGene=inputGene, sourceIdType=interestGeneType, targetIdType="entrezgene", mappingOutput=FALSE, hostName=hostName)
inputGene <- inputGene$mapped$geneSymbol

if (length(network) > 1) {
stop("NTA does not support multiple databases.")
}
inputGene <- formatCheck("list", inputGeneFile = inputSeedFile, inputGene = inputSeed)
# only networks are in gene symbols
# mapping always returns gene symbol, could map to genesymbol but takes two requests
inputGene <- idMappingGene(organism = organism, dataType = "list", inputGene = inputGene, sourceIdType = interestGeneType, targetIdType = "entrezgene", mappingOutput = FALSE, hostName = hostName)
inputGene <- inputGene$mapped$geneSymbol

if (startsWith(hostName, "file://")) {
dagInfo <- read_tsv(
removeFileProtocol(file.path(hostName, "geneset", paste(organism, "geneontology_Biological_Process", "entrezgene.dag", sep="_"))),
col_names=c("source", "target"), col_types="cc"
)
} else {
geneSetUrl <- file.path(hostName, "api", "geneset")
response <- cacheUrl(geneSetUrl, cache=cache, query=list(organism=organism, database="geneontology_Biological_Process", standardId="entrezgene", fileType="dag"))
dagInfo <- read_tsv(content(response), col_names=c("source", "target"), col_types="cc")
}

## networks <- unlist(strsplit(network, ",", fixed=TRUE))
## May need to bring back analysis of multiple networks
fileName <- paste(projectName, network, method, sep=".")
goEnrichRes <- randomWalkEnrichment(organism=organism, network=network, method=method, highlightSeedNum=highlightSeedNum, inputSeed=inputGene,
sigMethod=sigMethod, fdrThr=fdrThr, topThr=topThr, projectDir=projectDir,
topRank=neighborNum, projectName=projectName, cache=cache, hostName=hostName)
if (is.null(goEnrichRes)) {
return(NULL)
}
enrichResFile <- file.path(projectDir, paste0(fileName, "_enrichedResult.txt"))
if (startsWith(hostName, "file://")) {
dagInfo <- read_tsv(
removeFileProtocol(file.path(hostName, "geneset", paste(organism, "geneontology_Biological_Process", "entrezgene.dag", sep = "_"))),
col_names = c("source", "target"), col_types = "cc"
)
} else {
geneSetUrl <- file.path(hostName, "api", "geneset")
response <- cacheUrl(geneSetUrl, cache = cache, query = list(organism = organism, database = "geneontology_Biological_Process", standardId = "entrezgene", fileType = "dag"))
dagInfo <- read_tsv(content(response), col_names = c("source", "target"), col_types = "cc")
}

goTermList <- read_tsv(enrichResFile, col_types=cols())$goId
inputEndIndex <- length(goTermList)
## networks <- unlist(strsplit(network, ",", fixed=TRUE))
## May need to bring back analysis of multiple networks
fileName <- paste(projectName, network, method, sep = ".")
goEnrichRes <- randomWalkEnrichment(
organism = organism, network = network, method = method, highlightSeedNum = highlightSeedNum, inputSeed = inputGene,
sigMethod = sigMethod, fdrThr = fdrThr, topThr = topThr, projectDir = projectDir,
topRank = neighborNum, projectName = projectName, cache = cache, hostName = hostName
)
if (is.null(goEnrichRes)) {
return(NULL)
}
enrichResFile <- file.path(projectDir, paste0(fileName, "_enrichedResult.txt"))

dagTree <- expandDag(goTermList, dagInfo)
goTermList <- dagTree$allNodes
edges <- dagTree$edges
rm(dagTree)
goTermList <- read_tsv(enrichResFile, col_types = cols())$goId
inputEndIndex <- length(goTermList)

if (startsWith(hostName, "file://")) {
goId2Term <- read_tsv(
removeFileProtocol(file.path(hostName, "geneset", paste(organism, "geneontology_Biological_Process", "entrezgene.des", sep="_"))),
col_names=c("id", "name"), col_types="cc"
)
} else {
response <- POST(geneSetUrl, body=list(organism=organism, database="geneontology_Biological_Process",
fileType="des", ids=goTermList), encode="json")
goId2Term <- read_tsv(content(response), col_names=c("id", "name"), col_types="cc")
}
dagTree <- expandDag(goTermList, dagInfo)
goTermList <- dagTree$allNodes
edges <- dagTree$edges
rm(dagTree)

jsonFile <- file.path(projectDir, paste0(fileName, ".json"));
jsonData <- vector(mode="list", length=length(goTermList))
if (startsWith(hostName, "file://")) {
goId2Term <- read_tsv(
removeFileProtocol(file.path(hostName, "geneset", paste(organism, "geneontology_Biological_Process", "entrezgene.des", sep = "_"))),
col_names = c("id", "name"), col_types = "cc"
)
} else {
response <- POST(geneSetUrl, body = list(
organism = organism, database = "geneontology_Biological_Process",
fileType = "des", ids = goTermList, version = "2024"
), encode = "json")
goId2Term <- read_tsv(content(response), col_names = c("id", "name"), col_types = "cc")
}

for (i in 1:length(goTermList)) {
goId <- goTermList[[i]]
goName <- filter(goId2Term, .data$id == goId)[[1, "name"]]
dataSets <- i <= inputEndIndex
jsonData[[i]] <- list(data=list(id=goId, name=goName, datasets=dataSets))
}
jsonData <- unname(c(jsonData, edges))
jsonFile <- file.path(projectDir, paste0(fileName, ".json"))
jsonData <- vector(mode = "list", length = length(goTermList))

cat(toJSON(jsonData, auto_unbox=TRUE), "\n", sep="", file=jsonFile)
for (i in 1:length(goTermList)) {
goId <- goTermList[[i]]
goName <- filter(goId2Term, .data$id == goId)[[1, "name"]]
dataSets <- i <= inputEndIndex
jsonData[[i]] <- list(data = list(id = goId, name = goName, datasets = dataSets))
}
jsonData <- unname(c(jsonData, edges))

createNtaReport(networkName=network, method=method, sigMethod=sigMethod, fdrThr=fdrThr, topThr=topThr,
highlightType=highlightType, outputDirectory=outputDirectory, projectDir=projectDir,
projectName=projectName, hostName=hostName)
cat(toJSON(jsonData, auto_unbox = TRUE), "\n", sep = "", file = jsonFile)

cwd <- getwd()
setwd(projectDir)
zip(paste0(projectName, ".zip"), ".", flags="-rq")
setwd(cwd)
createNtaReport(
networkName = network, method = method, sigMethod = sigMethod, fdrThr = fdrThr, topThr = topThr,
highlightType = highlightType, outputDirectory = outputDirectory, projectDir = projectDir,
projectName = projectName, hostName = hostName
)

cat("Results can be found in the ", projectDir, "!\n", sep="")
return(goEnrichRes)
cwd <- getwd()
setwd(projectDir)
zip(paste0(projectName, ".zip"), ".", flags = "-rq")
setwd(cwd)

cat("Results can be found in the ", projectDir, "!\n", sep = "")
return(goEnrichRes)
}
73 changes: 38 additions & 35 deletions R/cacheFile.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
urlToFile <- function(dataUrl) {
result <- sub("^http://", "", dataUrl)
result <- sub("^https://", "", result)
result <- gsub("\\?[^?]+?=", "_", result)
result <- gsub("&[^&]+?=", "_", result)
result <- gsub("[:/.]", "_", result)
result <- gsub("_+", "_", result, fixed=TRUE)
return(result)
result <- sub("^http://", "", dataUrl)
result <- sub("^https://", "", result)
result <- gsub("\\?[^?]+?=", "_", result)
result <- gsub("&[^&]+?=", "_", result)
result <- gsub("[:/.]", "_", result)
result <- gsub("_+", "_", result, fixed = TRUE)
return(result)
}

#' cacheUrl
Expand All @@ -21,32 +21,35 @@ urlToFile <- function(dataUrl) {
#' @importFrom httr GET
#' @keywords internal
#'
cacheUrl <- function(dataUrl, cache=NULL, query=NULL) {
if (!is.null(cache)) {
dir.create(cache, showWarnings=FALSE)
if (!is.null(query)) {
localFilePrefix <- urlToFile(paste0(dataUrl, "_", paste0(query, collapse="_")))
} else {
localFilePrefix <- urlToFile(dataUrl)
}
localFile <- file.path(cache, paste0(localFilePrefix, ".rds"))
}
if (!is.null(cache) && file.exists(localFile)) {
#cat("Reading from cache: ", localFile, "\n")
response <- readRDS(localFile)
} else {
#cat("Reading from server: ", dataUrl, "\n")
if (!is.null(query)) {
response <- GET(dataUrl, query=query)
} else {
response <- GET(dataUrl)
}
if (response$status_code != 200) {
return(response)
}
if (!is.null(cache)) {
saveRDS(response, localFile)
}
}
return(response)
cacheUrl <- function(dataUrl, cache = NULL, query = NULL) {
if (!is.null(cache)) {
dir.create(cache, showWarnings = FALSE)
if (!is.null(query)) {
localFilePrefix <- urlToFile(paste0(dataUrl, "_", paste0(query, collapse = "_")))
} else {
localFilePrefix <- urlToFile(dataUrl)
}
localFile <- file.path(cache, paste0(localFilePrefix, ".rds"))
}
if (!is.null(cache) && file.exists(localFile)) {
# cat("Reading from cache: ", localFile, "\n")
response <- readRDS(localFile)
} else {
# cat("Reading from server: ", dataUrl, "\n")
if (!is.null(query)) {
if (!("version" %in% names(query))) {
query[["version"]] <- "2024"
}
response <- GET(dataUrl, query = query)
} else {
response <- GET(dataUrl, query = list(version = "2024"))
}
if (response$status_code != 200) {
return(response)
}
if (!is.null(cache)) {
saveRDS(response, localFile)
}
}
return(response)
}
2 changes: 1 addition & 1 deletion R/goSlimSummary.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ goSlimSummary <- function(organism="hsapiens", geneList, outputFile, outputType=
goSlimData <- filter(goSlimData, .data$entrezgene %in% geneList)
} else {
goUrl <- file.path(hostName, "api", "goslim")
response <- POST(goUrl, body=list(organism=organism, ontology=ontology, entrezgenes=geneList), encode="json")
response <- POST(goUrl, body=list(organism=organism, ontology=ontology, entrezgenes=geneList, version="2024"), encode="json")
if (response$status_code != 200) {
stop(webRequestError(response))
}
Expand Down
Loading

0 comments on commit 379fe46

Please sign in to comment.