Skip to content

Commit

Permalink
feat: use correct property for labelling HAL, let users enter property (
Browse files Browse the repository at this point in the history
  • Loading branch information
maelle authored Oct 12, 2023
1 parent c25d68a commit 29d89e0
Show file tree
Hide file tree
Showing 29 changed files with 620 additions and 100 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ export(spq_arrange)
export(spq_assemble)
export(spq_control_request)
export(spq_count)
export(spq_endpoint_info)
export(spq_filter)
export(spq_group_by)
export(spq_head)
Expand Down
2 changes: 1 addition & 1 deletion R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ wikidata_url <- function() {
#' \describe{
#' \item{name}{the abbreviated name of the SPARQL endpoint}
#' \item{url}{the full address of the SPARQL endpoint}
#' ...
#' \item{label_property}{the property used for labelling}
#' }
"usual_endpoints"

Expand Down
4 changes: 4 additions & 0 deletions R/glitter-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,7 @@
#' @importFrom lifecycle deprecated
## usethis namespace: end
NULL

release_bullets <- function() {
c("Update articles cf data-raw/articles.R")
}
23 changes: 23 additions & 0 deletions R/spq_endpoint_info.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#' Create the endpoint info object for `spq_init()`
#'
#' @param label_property Property used by the endpoint for labelling.
#'
#' @return A list to be used in `spq_init()`'s `endpoint_info` argument.
#' @export
#'
#' @examples
#' spq_endpoint_info(label_property = "skos:preflabel")
spq_endpoint_info <- function(label_property = "rdfs:prefLabel") {

# TODO check property more
if (!is.character(label_property)) {
cli::cli_abort("Must provide a character as {.arg label_property}.")
}

structure(
list(
label_property = label_property
),
class = "glitter_endpoint_info"
)
}
22 changes: 19 additions & 3 deletions R/spq_init.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
#'
#' @param endpoint Endpoint, either name if it is in `usual_endpoints`,
#' or an URL
#' @param endpoint_info Do not use for an usual endpoint in `usual_endpoints`!
#' Information about
#' the endpoint
#' @param request_control An object as returned by [`spq_control_request()`]
#'
#' @return A query object
Expand All @@ -28,22 +31,35 @@ spq_init = function(
max_seconds = getOption("glitter.max_seconds", 120L),
timeout = getOption("glitter.timeout", 1000L),
request_type = c("url", "body-form")
),
endpoint_info = spq_endpoint_info(
label_property = "rdfs:label"
)
) {
if (!inherits(request_control, "glitter_request_control")) {
cli::cli_abort("{.arg request_control} must be created by {.fun spq_control_request}.")
}
if (!inherits(endpoint_info, "glitter_endpoint_info")) {
cli::cli_abort("{.arg endpoint_info} must be created by {.fun spq_endpoint_info}.")
}

# if endpoint passed as name, get url
endpoint = tolower(endpoint)
usual_endpoint_info = usual_endpoints %>%
dplyr::filter(.data$name == endpoint)
endpoint = if (nrow(usual_endpoint_info) > 0) {
dplyr::pull(usual_endpoint_info, .data$url)
if (nrow(usual_endpoint_info) > 0) {
endpoint = dplyr::pull(usual_endpoint_info, .data$url)
label_property = dplyr::pull(usual_endpoint_info, .data$label_property)
} else {
endpoint
label_property = NULL
}

endpoint_info = list(
endpoint_url = endpoint,
label_property = label_property %||% endpoint_info[["label_property"]]
)

query = list(
prefixes_provided = tibble::tibble(name = NULL, url = NULL),
prefixes_used = NULL,
Expand All @@ -56,7 +72,7 @@ spq_init = function(
group_by = NULL,
order_by = NULL,
offset = NULL,
endpoint = endpoint,
endpoint_info = endpoint_info,
request_control = request_control
)

Expand Down
21 changes: 17 additions & 4 deletions R/spq_label.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
#' `spq_select(blop, .overwrite = TRUE)` means you get the label as `blop`,
#' the "original" blop variable isn't returned.
#'
#' @details
#' `spq_label()` uses the property:
#' - associated with the usual endpoint see `usual_endpoints`
#' - the property indicated in [`spq_endpoint_info()`]
#'
#'
#' @return A query object
#' @export
#'
Expand All @@ -35,6 +41,10 @@ spq_label <- function(.query,
.required = FALSE,
.languages = getOption("glitter.lang", "en$"),
.overwrite = FALSE) {

label_property <- .query[["endpoint_info"]][["label_property"]] %||%
"rdfs:label"

vars = purrr::map_chr(rlang::enquos(...), spq_treat_argument)

if (!is.null(.languages)) .languages = tolower(.languages)
Expand All @@ -43,7 +53,7 @@ spq_label <- function(.query,
vars,
function(query, x) {
if (is.null(.languages)) {
filter = NA
filter = NULL
} else {

languages_filter <- purrr::map_chr(.languages, create_lang_filter, x = x)
Expand All @@ -56,14 +66,17 @@ spq_label <- function(.query,
if (.required) {
q = spq_add(
query,
sprintf("%s rdfs:label %s_labell", x, x),
sprintf("%s %s %s_labell", x, label_property, x),
.required = .required
)
q = spq_filter(q, spq(filter))
if (!is.null(filter)) {
q = spq_filter(q, spq(filter))
}

} else {
q = spq_add(
query,
sprintf("%s rdfs:label %s_labell", x, x),
sprintf("%s %s %s_labell", x,label_property, x),
.required = .required,
.filter = filter
)
Expand Down
2 changes: 1 addition & 1 deletion R/spq_language.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
#' @param language the language in which the labels will be provided (defaults to "en" for English). See complete list of Wikimedia language codes [here](https://www.wikidata.org/wiki/Help:Wikimedia_language_codes/lists/all). You can also set language to "auto" and then the Wikidata SPARQL engine will try and detect your language automatically. Specifying several languages will return labels with languages following the priority specified (e.g. with language="fr,en", the label will be returned preferentially in French, or, if there is not French label for the item, in English).
#' @return A query object
#' @export
#' @examples
#' @keywords internal
#' @examples
#' spq_init() %>%
#' spq_add("?film wdt:P31 wd:Q11424") %>%
#' spq_label(film, .languages = c("fr$", "en$")) %>%
Expand Down
2 changes: 1 addition & 1 deletion R/spq_perform.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ spq_perform = function(.query,
"spq_init(endpoint)"
)
} else {
endpoint = .query[["endpoint"]]
endpoint = .query[["endpoint_info"]][["endpoint_url"]]
}


Expand Down
2 changes: 1 addition & 1 deletion R/spq_rename_var.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ spq_rename_var <- function(.query, old, new) {
}

if (question_mark(new) %in% .query[["vars"]][["name"]]) {
if (.query[["vars"]][["renamed"]][.query[["vars"]][["name"]] == question_mark(new)]) {
if (any(.query[["vars"]][["renamed"]][.query[["vars"]][["name"]] == question_mark(new)])) {
.query = spq_rename_var(.query, new, sprintf("%s0", new))
} else {
cli::cli_abort("Can't rename {.field {old}} to {.field {new}} as {.field {new}} already exists.")
Expand Down
3 changes: 3 additions & 0 deletions data-raw/articles.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
withr::with_dir("vignettes/articles", {
knitr::knit("glitter_bibliometry.Rmd.orig", output = "glitter_bibliometry.Rmd")
})
12 changes: 0 additions & 12 deletions data-raw/create_usual_endpoints.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,2 @@
endpoints=tibble::tibble(name=c("wikidata",
"dbpedia",
"databnf",
"isidore",
"hal",
"symogih"),
url=c("https://query.wikidata.org/",
"https://dbpedia.org/sparql",
"https://data.bnf.fr/sparql",
"https://isidore.science/sparql",
"http://sparql.archives-ouvertes.fr/sparql",
"http://bhp-publi.ish-lyon.cnrs.fr:8888/sparql"))
usual_endpoints=readr::read_csv("data-raw/usual_endpoints.csv")
usethis::use_data(usual_endpoints,overwrite=TRUE)
12 changes: 6 additions & 6 deletions data-raw/usual_endpoints.csv
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name,url
wikidata,https://query.wikidata.org/sparql
dbpedia,https://dbpedia.org/sparql
databnf,https://data.bnf.fr/sparql
isidore,https://isidore.science/sparql
hal,http://sparql.archives-ouvertes.fr/sparql
name,url,label_property
wikidata,https://query.wikidata.org/sparql,rdfs:label
dbpedia,https://dbpedia.org/sparql,rdfs:label
databnf,https://data.bnf.fr/sparql,rdfs:label
isidore,https://isidore.science/sparql,rdfs:label
hal,http://sparql.archives-ouvertes.fr/sparql,skos:prefLabel
Binary file modified data/usual_endpoints.rda
Binary file not shown.
20 changes: 20 additions & 0 deletions man/spq_endpoint_info.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion man/spq_init.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions man/spq_label.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/spq_language.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/usual_endpoints.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 43 additions & 0 deletions tests/testthat/_snaps/spq_label.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,30 @@
}

# spq_label() for not rdfs:label

Code
spq_init(endpoint = "hal") %>% spq_add(
"haldoc:inria-00362381 dcterms:hasVersion ?version") %>% spq_add(
"?version dcterms:type ?type") %>% spq_label(type)
Output
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX haldoc: <https://data.archives-ouvertes.fr/document/>
SELECT ?type (COALESCE(?type_labell,'') AS ?type_label) ?version
WHERE {
haldoc:inria-00362381 dcterms:hasVersion ?version.
?version dcterms:type ?type.
OPTIONAL {
?type skos:prefLabel ?type_labell.
FILTER(lang(?type_labell) IN ('en'))
}
}

# spq_label() .overwrite

Code
Expand Down Expand Up @@ -91,3 +115,22 @@
}

# spq_label() .languages = NULL

Code
spq_init(endpoint = "hal") %>% spq_label(labo, .languages = NULL, .required = TRUE) %>%
spq_add("?labo dcterms:identifier ?labo_id", .required = FALSE) %>%
spq_filter(str_detect(labo_label, "EVS|(UMR 5600)|(Environnement Ville Soc)"))
Output
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT ?labo (COALESCE(?labo_labell,'') AS ?labo_label) ?labo_id
WHERE {
?labo skos:prefLabel ?labo_labell.
OPTIONAL {?labo dcterms:identifier ?labo_id.}
BIND(COALESCE(?labo_labell,'') AS ?labo_label)
FILTER(REGEX(?labo_label,"EVS|(UMR 5600)|(Environnement Ville Soc)"))
}

18 changes: 18 additions & 0 deletions tests/testthat/test-spq_label.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,15 @@ test_that("spq_label() works", {
)
})

test_that("spq_label() for not rdfs:label", {
expect_snapshot(
spq_init(endpoint = "hal") %>%
spq_add("haldoc:inria-00362381 dcterms:hasVersion ?version") %>%
spq_add("?version dcterms:type ?type") %>%
spq_label(type)
)
})

test_that("spq_label() .overwrite", {

expect_snapshot(
Expand All @@ -47,3 +56,12 @@ test_that("spq_label() .overwrite", {
spq_label(mayor, place, .languages = "en$", .overwrite = TRUE)
)
})

test_that("spq_label() .languages = NULL", {
expect_snapshot(
spq_init(endpoint = "hal") %>%
spq_label(labo, .languages = NULL, .required = TRUE) %>%
spq_add("?labo dcterms:identifier ?labo_id", .required = FALSE) %>%
spq_filter(str_detect(labo_label,"EVS|(UMR 5600)|(Environnement Ville Soc)"))
)
})
Binary file added vignettes/articles/figure/docs_LV_recents-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added vignettes/articles/figure/interet_LV-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added vignettes/articles/figure/interet_LV_run-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added vignettes/articles/figure/labo_EVS_filter-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added vignettes/articles/figure/orga_LV_prep-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added vignettes/articles/figure/orga_LV_run-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added vignettes/articles/figure/test_LV-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 29d89e0

Please sign in to comment.