From 68297dddec8a46aa6d1fa5b701c80a04ca5fd6c0 Mon Sep 17 00:00:00 2001 From: "J. Allen Baron" Date: Thu, 12 Dec 2024 17:45:53 -0500 Subject: [PATCH] Create robot_mquery() Learned that this may be unnecessary... probably better to update robot_query() so it's psuedo-vectorized. See https://github.com/ontodev/robot/issues/1222 --- NAMESPACE | 1 + R/robot_wrappers.R | 172 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 173 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 07e7958..0ca4c34 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -123,6 +123,7 @@ export(replace_na) export(replace_null) export(restore_names) export(robot) +export(robot_mquery) export(robot_query) export(round_down) export(round_up) diff --git a/R/robot_wrappers.R b/R/robot_wrappers.R index 429fe26..6a33067 100644 --- a/R/robot_wrappers.R +++ b/R/robot_wrappers.R @@ -145,3 +145,175 @@ robot_query <- function(input, query, output = NULL, ..., } out } + + +#' Execute Multiple SPARQL Queries with ROBOT +#' +#' Wrapper for `robot("query", ...)` that accepts one `input` but multiple +#' queries, and optionally multiple output files. +#' +#' @param input The path to an RDF/OWL file recognized by ROBOT, as a string. +#' @param query The text for or path to one or more valid SPARQL queries (`ASK`, +#' `SELECT`, `CONSTRUCT`, or `UPDATE`) as a character vector. +#' @param output The path(s) where output will be written or `NA`, as a +#' character vector. If `NULL` (default), or for each `NA` data will be loaded +#' directly. NOTE: `output` is required for `UPDATE` and `CONSTRUCT` queries. +#' @param ... Additional arguments to +#' [ROBOT query](http://robot.obolibrary.org/query) formatted as described in +#' [DO.utils::robot()]. +#' @inheritParams tidy_sparql +#' @inheritParams readr::read_tsv +#' @inheritParams robot +#' +#' @returns +#' If `output` is specified, the path(s) to the output file(s) with the query +#' result(s). Otherwise, the query result(s) as a list. ASK results will be +#' boolean, while SELECT results will be as [tibble](tibble::tibble)s. +#' +#' @seealso [robot()] for underlying implementation. +#' +#' @export +robot_mquery <- function(input, query, output = NA, ..., + tidy_what = "nothing", col_types = NULL, + .robot_path = NULL) { + stopifnot("`input` must be a string" = is_string(input)) + query_info <- check_query(query) + output_info <- check_query_output(output, query_info) + + + + + # output handling + if (is.null(output)) { + to_stdout <- TRUE + output <- tmp_out <- tempfile(fileext = ".tsv") + on.exit(unlink(tmp_out), add = TRUE) + } else { + to_stdout <- FALSE + } + + # handle varying ROBOT parameters for UPDATE queries + if (q_type == "update") { + robot( + "query", + i = input, + update = query, + o = output, + ..., + .robot_path = .robot_path + ) + } else { + robot( + "query", + i = input, + query = query, + output, + ..., + .robot_path = .robot_path + ) + } + + # handle output + if (isFALSE(to_stdout)) { + return(output) + } + + if (q_type == "ask") { + ask_res <- readr::read_file(output) + out <- switch(ask_res, + true = TRUE, + false = FALSE, + ask_res + ) + } else { + out <- readr::read_tsv( + output, + col_types = col_types, + show_col_types = FALSE + ) + out <- tidy_sparql(out, tidy_what) + } + out +} + + +# robot_mquery helpers ------------------------------------------------------- + +#' Check Queries +#' +#' Check if queries are files or text, write those input as text to file, and +#' determine query types. +#' +#' @inheritParams robot_mquery +#' @returns Query file paths and types, as a named character vector. +#' @noRd +check_query <- function(query) { + stopifnot("`query` must be a character vector" = is.character(query)) + + query_is_file <- file.exists(query) + + # capture query file location, write those input as text (required by ROBOT) + temp_query_file <- purrr::map_chr( + 1:sum(!query_is_file), ~ tempfile(fileext = ".sparql") + ) + out <- query + out[!query_is_file] <- purrr::map2_chr( + query[!query_is_file], + temp_query_file, + ~ { readr::write_lines(.x, .y) ; .y } + ) + + query_txt <- query + query_txt[query_is_file] <- purrr::map_chr( + query[query_is_file], + readr::read_file + ) + + # determine query type (SELECT = default, but not in search since it can be + # used in other queries) + q_type <- stringr::str_extract( + query_txt, + stringr::regex( + "\\b(insert|delete|construct|ask)\\b", + ignore_case = TRUE + ) + )[1] + q_type <- stringr::str_to_lower(q_type) + q_type[is.na(q_type)] <- "select" + q_type <- switch(q_type, + insert = "update", + delete = "update", + q_type + ) + names(out) <- q_type + out +} + +check_query_output <- function(output, query_info) { + q_type <- names(query_info) + q_req_output <- q_type %in% c("update", "construct") + output_req <- output[q_req_output] + # if (any(q_req_output)) { + # rlang::abort( + # "`output` is required for each CONSTRUCT or UPDATE (INSERT/DELETE) `query`", + # purrr::set_names( + + # ) + # } + if (!is.null(output) && length(output) != length(query_info)) { + rlang::abort("`output` must be the same length as `query`") + } + + if (q_type %in% c("update", "construct") && is.null(output)) { + rlang::abort("`output` is required for CONSTRUCT or UPDATE (INSERT/DELETE) `query`") + } + + # capture output file location + temp_output_file <- purrr::map_chr( + 1:sum(!output_is_file), ~ tempfile(fileext = ".tsv") + ) + out <- output + out[!output_is_file] <- temp_output_file + + out +} \ No newline at end of file