Skip to content

Commit

Permalink
Set user agent to "Bioconductor UCSC.utils" when querying UCSC REST API
Browse files Browse the repository at this point in the history
  • Loading branch information
hpages committed Mar 22, 2024
1 parent d91d167 commit 95c7183
Show file tree
Hide file tree
Showing 4 changed files with 126 additions and 118 deletions.
11 changes: 10 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,20 @@ biocViews: Infrastructure, GenomeAssembly, Annotation, GenomeAnnotation,
DataImport
URL: https://bioconductor.org/packages/UCSC.utils
BugReports: https://github.com/Bioconductor/UCSC.utils/issues
Version: 0.99.1
Version: 0.99.2
License: Artistic-2.0
Encoding: UTF-8
Authors@R: person("Hervé", "Pagès", role=c("aut", "cre"),
email="[email protected]")
Imports: methods, stats, httr, rjson, S4Vectors
Suggests: DBI, RMariaDB, GenomeInfoDb, testthat, knitr, rmarkdown, BiocStyle
VignetteBuilder: knitr
Collate: 00utils.R
UCSC.api.url.R
REST_API.R
list_UCSC_genomes.R
get_UCSC_chrom_sizes.R
list_UCSC_tracks.R
fetch_UCSC_track_data.R
UCSC_dbselect.R
zzz.R
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import(methods)
importFrom(stats, setNames)
importFrom(httr, GET, content)
importFrom(httr, GET, content, user_agent)
importFrom(rjson, fromJSON)

importFrom(S4Vectors, wmsg, isTRUEorFALSE, isSingleNumber,
Expand Down
115 changes: 115 additions & 0 deletions R/REST_API.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
### =========================================================================
### Thin R wrappers to UCSC REST API endpoints
### -------------------------------------------------------------------------
###
### Nothing in this file is exported.
###


.API_query <- function(endpoint, query=list(), api.url=UCSC.api.url())
{
stopifnot(isSingleString(endpoint), nzchar(endpoint),
is.list(query),
isSingleString(api.url), nzchar(api.url))
if (length(query) != 0L)
stopifnot(!is.null(names(query)))
url <- paste0(api.url, "/", endpoint)
GET(url, user_agent("Bioconductor UCSC.utils"), query=query)
}

.parse_json <- function(response)
{
parsed_json <- fromJSON(content(response, as="text", encoding="UTF-8"))
## Sanity checks.
stopifnot(is.list(parsed_json), !is.null(names(parsed_json)))
parsed_json
}


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### We only support the following endpoints at the moment:
### - /list/ucscGenomes
### - /list/chromosomes
### - /list/tracks
### - /getData/track
### See https://genome.ucsc.edu/goldenPath/help/api.html#Endpoint for the
### full list of endpoints.
###
### All the functions below return parsed JSON.
###

### Endpoint /list/ucscGenomes
API_list_genomes <- function(api.url=UCSC.api.url())
{
endpoint <- "list/ucscGenomes"
response <- .API_query(endpoint, api.url=api.url)
if (response[["status_code"]] != 200L)
stop(wmsg("failed to get list of UCSC genomes from ", api.url))

ans <- .parse_json(response)[["ucscGenomes"]]
## Sanity check.
stopifnot(is.list(ans))
ans
}

### Endpoint /list/chromosomes
API_list_chromosomes <- function(genome, api.url=UCSC.api.url())
{
stopifnot(isSingleString(genome), nzchar(genome))

endpoint <- "list/chromosomes"
query <- list(genome=genome)
response <- .API_query(endpoint, query=query, api.url=api.url)
if (response[["status_code"]] != 200L)
stop(wmsg(genome, ": unknown UCSC genome ",
"(or ", api.url, " is down?)"))

ans <- .parse_json(response)
## Sanity check.
stopifnot(identical(ans[["genome"]], genome))
ans
}

### Endpoint /list/tracks
API_list_tracks <- function(genome, api.url=UCSC.api.url())
{
stopifnot(isSingleString(genome), nzchar(genome))

endpoint <- "list/tracks"
query <- list(genome=genome)
response <- .API_query(endpoint, query=query, api.url=api.url)
if (response[["status_code"]] != 200L)
stop(wmsg(genome, ": unknown UCSC genome ",
"(or ", api.url, " is down?)"))

ans <- .parse_json(response)[[genome]]
## Sanity check.
stopifnot(is.list(ans))
ans
}

### Endpoint /getData/track
### Note that the endpoint expects the supplied 'track' argument to be the
### name of the track's primary table rather than the track's name.
### E.g. "catLiftOffGenesV1" rather than "CAT/Liftoff Genes".
API_get_track_data <- function(genome, primary_table, api.url=UCSC.api.url())
{
stopifnot(isSingleString(genome), nzchar(genome),
isSingleString(primary_table), nzchar(primary_table))

endpoint <- "getData/track"
query <- list(genome=genome, track=primary_table)
response <- .API_query(endpoint, query=query, api.url=api.url)
if (response[["status_code"]] != 200L)
stop(wmsg(genome, "/", primary_table, ": ",
"unknown UCSC genome/primary_table ",
"(or ", api.url, " is down?)"))

ans <- .parse_json(response)
## Sanity checks.
stopifnot(identical(ans[["genome"]], genome))
if (!is.null(ans[["track"]]))
stopifnot(identical(ans[["track"]], primary_table))
ans
}

116 changes: 0 additions & 116 deletions R/UCSC.api.url.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
###


### Exported.
UCSC.api.url <- function(new_url=NULL)
{
ans <- getOption("UCSC.api.url")
Expand All @@ -18,118 +17,3 @@ UCSC.api.url <- function(new_url=NULL)
invisible(ans) # return old URL invisibly
}


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Helpers for the thin endpoint wrappers defined below
###

.API_query <- function(endpoint, query=list(), api.url=UCSC.api.url())
{
stopifnot(isSingleString(endpoint), nzchar(endpoint),
is.list(query),
isSingleString(api.url), nzchar(api.url))
if (length(query) != 0L)
stopifnot(!is.null(names(query)))
url <- paste0(api.url, "/", endpoint)
GET(url, query=query)
}

.parse_json <- function(response)
{
parsed_json <- fromJSON(content(response, as="text", encoding="UTF-8"))
## Sanity checks.
stopifnot(is.list(parsed_json), !is.null(names(parsed_json)))
parsed_json
}


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Thin wrappers to UCSC REST API endpoints
###
### At the moment, we only support:
### - /list/ucscGenomes
### - /list/chromosomes
### - /list/tracks
### - /getData/track
### See https://genome.ucsc.edu/goldenPath/help/api.html#Endpoint for the
### full list of endpoint functions.
###
### All the functions below return parsed JSON.
### None of them is exported.
###

### Endpoint /list/ucscGenomes
API_list_genomes <- function(api.url=UCSC.api.url())
{
endpoint <- "list/ucscGenomes"
response <- .API_query(endpoint, api.url=api.url)
if (response[["status_code"]] != 200L)
stop(wmsg("failed to get list of UCSC genomes from ", api.url))

ans <- .parse_json(response)[["ucscGenomes"]]
## Sanity check.
stopifnot(is.list(ans))
ans
}

### Endpoint /list/chromosomes
API_list_chromosomes <- function(genome, api.url=UCSC.api.url())
{
stopifnot(isSingleString(genome), nzchar(genome))

endpoint <- "list/chromosomes"
query <- list(genome=genome)
response <- .API_query(endpoint, query=query, api.url=api.url)
if (response[["status_code"]] != 200L)
stop(wmsg(genome, ": unknown UCSC genome ",
"(or ", api.url, " is down?)"))

ans <- .parse_json(response)
## Sanity check.
stopifnot(identical(ans[["genome"]], genome))
ans
}

### Endpoint /list/tracks
API_list_tracks <- function(genome, api.url=UCSC.api.url())
{
stopifnot(isSingleString(genome), nzchar(genome))

endpoint <- "list/tracks"
query <- list(genome=genome)
response <- .API_query(endpoint, query=query, api.url=api.url)
if (response[["status_code"]] != 200L)
stop(wmsg(genome, ": unknown UCSC genome ",
"(or ", api.url, " is down?)"))

ans <- .parse_json(response)[[genome]]
## Sanity check.
stopifnot(is.list(ans))
ans
}

### Endpoint /getData/track
### Note that the endpoint expects the supplied 'track' argument to be the
### name of the track's primary table rather than the track's name.
### E.g. "catLiftOffGenesV1" rather than "CAT/Liftoff Genes".
API_get_track_data <- function(genome, primary_table, api.url=UCSC.api.url())
{
stopifnot(isSingleString(genome), nzchar(genome),
isSingleString(primary_table), nzchar(primary_table))

endpoint <- "getData/track"
query <- list(genome=genome, track=primary_table)
response <- .API_query(endpoint, query=query, api.url=api.url)
if (response[["status_code"]] != 200L)
stop(wmsg(genome, "/", primary_table, ": ",
"unknown UCSC genome/primary_table ",
"(or ", api.url, " is down?)"))

ans <- .parse_json(response)
## Sanity checks.
stopifnot(identical(ans[["genome"]], genome))
if (!is.null(ans[["track"]]))
stopifnot(identical(ans[["track"]], primary_table))
ans
}

0 comments on commit 95c7183

Please sign in to comment.