generated from CorrelAid/r-bare-template
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #31 from CorrelAid/zensus
Merge branch 'zensus' into 'dev'
- Loading branch information
Showing
161 changed files
with
12,064 additions
and
9,008 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,3 +9,4 @@ | |
^docs$ | ||
^pkgdown$ | ||
^codecov\.yml$ | ||
^cran-comments\.md$ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,21 @@ | ||
Package: restatis | ||
Title: R Wrapper for the GENESIS Web Service RESTful API of the German | ||
Title: R Wrapper to access a wide range of Germany's Federal Statistical System | ||
databases based on the GENESIS Web Service RESTful API of the German | ||
Federal Statistical Office (Statistisches Bundesamt/Destatis) | ||
Version: 0.1.0 | ||
Version: 0.2.0 | ||
Authors@R: c( | ||
person("Yannik", "Buhl", role = "aut"), | ||
person("Yannik", "Buhl", , "[email protected]", role = c("aut", "cre")), | ||
person("Zoran", "Kovacevic", role = "aut", | ||
comment = c(ORCID = "0009-0002-0156-0862")), | ||
person("Dorian", "Le Jeune", role = "aut"), | ||
person("Long", "Nguyen", , "[email protected]", role = c("aut", "cre"), | ||
person("Long", "Nguyen", , "[email protected]", role = "aut", | ||
comment = c(ORCID = "0000-0001-8878-7386")), | ||
person("Johannes", "Ritter", role = "aut") | ||
) | ||
Description: A RESTful API wrapper for accessing the GENESIS database of | ||
the German Federal Statistical Office (Destatis). Also supports data | ||
search functions, credential management, result caching, and handling | ||
the German Federal Statistical Office (Destatis) as well as its Census | ||
Database and the database of Germany's regional statistics. Supports data | ||
search functions, credential management, result caching, and handling | ||
remote background jobs for large datasets. | ||
License: MIT + file LICENSE | ||
URL: https://correlaid.github.io/restatis/ | ||
|
@@ -25,16 +27,19 @@ Imports: | |
memoise, | ||
readr, | ||
tibble, | ||
vctrs | ||
vctrs, | ||
purrr | ||
Suggests: | ||
httptest2, | ||
usethis, | ||
knitr, | ||
rmarkdown, | ||
testthat (>= 3.0.0) | ||
testthat (>= 3.0.0), | ||
rvest | ||
VignetteBuilder: | ||
knitr | ||
Config/testthat/edition: 3 | ||
Encoding: UTF-8 | ||
LazyData: true | ||
Roxygen: list(markdown = TRUE) | ||
RoxygenNote: 7.2.3 | ||
RoxygenNote: 7.3.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
YEAR: 2022 | ||
YEAR: 2024 | ||
COPYRIGHT HOLDER: restatis authors |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,11 @@ | ||
#' List of EVAS codes | ||
#' | ||
#' @format ## `evas_list_long_20220724` | ||
#' A data frame with 1,097 rows and 3 columns: | ||
#' @format ## `evas_list` | ||
#' A data frame with 1132 rows and 3 columns: | ||
#' \describe{ | ||
#' \item{EVAS}{EVAS code} | ||
#' \item{Beschreibung}{Details on the EVAS code} | ||
#' \item{Titel}{Alternative desription of EVAS code contents} | ||
#' ... | ||
#' } | ||
#' @source <https://www.destatis.de/DE/Methoden/Revisionen/Glossar/EVAS.html> | ||
"evas_list_long_20220724" | ||
"evas_list" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,90 +1,131 @@ | ||
#' gen_alternative_terms: Call For Similiar or Spelling Related Terms for Further Search | ||
#' gen_alternative_terms | ||
#' | ||
#' @description Function to find search terms that are similar or related to one another and also represented in Genesis. | ||
#' @description Function to find search terms that are similar or related to one another in spelling and also represented in the GENESIS, Zensus 2022 or regionalstatistik.de databases. Important note: The API call is searching for terms with the same characters. To be useful in searching for related terms it is highly recommended to work with "*" placeholders (see examples). The placeholder can be placed before and/or after the search term. | ||
#' | ||
#' @param term Character string. Maximum length of 15 characters. Term or word for which you are searching for alternative or related terms. Use of '*' as a placeholder is possible to generate broader search areas. | ||
#' @param similarity Logical. Indicator if the output of the function should be sorted based on a Levenshtein edit distance based on the \code{adist()} function. | ||
#' @param ... Additional parameters for the Genesis API call. These parameters are only affecting the Genesis call itself, no further processing. For more details see `vignette("additional_parameter")`. | ||
#' @param similarity Boolean. Indicator if the output of the function should be sorted based on a Levenshtein edit distance based on the \code{adist()} function. Default is 'TRUE'. | ||
#' @param database Character string. Indicator if the GENESIS ('genesis'), Zensus 2022 ('zensus') or regionalstatistik.de ('regio') database is called. Default option is 'all'. | ||
#' @param verbose Boolean. Indicator if the output of the function should include detailed messages and warnings. Default option is 'TRUE'. Set the parameter to 'FALSE' to suppress additional messages and warnings. | ||
#' @param ... Additional parameters for the API call. These parameters are only affecting the call itself, no further processing. For more details see `vignette("additional_parameter")`. | ||
#' | ||
#' @return A list with all recalled elements from Genesis. Attributes are added to the data.frame, describing the search configuration for the returned output. | ||
#' @return A list with all recollected elements from the respective database. Attributes are added to the data.frame, describing the search configuration for the returned output. | ||
#' @export | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' # Find terms that are similar (in spelling) to search term "bus" | ||
#' # Find terms at GENESIS that are the same (in spelling) to search term "bus" | ||
#' # and sort them by Levenshtein edit distance | ||
#' object <- gen_alternative_terms(term = "bus", similarity = TRUE) | ||
#' object <- gen_alternative_terms(term = "bus", similarity = TRUE, database = "genesis") | ||
#' | ||
#' # Find terms that are related (in spelling) to search term "bus" | ||
#' object <- gen_alternative_terms(term = "bus*", similarity = TRUE) | ||
#' # Find terms at GENESIS that are related (in spelling) to search term "bus" | ||
#' object <- gen_alternative_terms(term = "bus*", similarity = TRUE, database = "genesis") | ||
#' | ||
#' # Find terms at Zensus 2022 that are related (in spelling) to search term "wohn" | ||
#' object <- gen_alternative_terms(term = "wohn*", similarity = TRUE, database = "zensus") | ||
#' } | ||
#' | ||
gen_alternative_terms <- function(term = NULL, | ||
similarity = TRUE, | ||
database = c("all", "genesis", "zensus", "regio"), | ||
verbose = TRUE, | ||
...) { | ||
|
||
caller <- as.character(match.call()[1]) | ||
|
||
gen_fun <- test_database_function(database, | ||
error.input = TRUE, | ||
text = verbose) | ||
|
||
check_function_input(term = term, | ||
similarity = similarity, | ||
caller = caller) | ||
|
||
#------------------------------------------------------------------------------- | ||
caller = caller, | ||
verbose = verbose) | ||
|
||
results_raw <- gen_api("catalogue/terms", | ||
#----------------------------------------------------------------------------- | ||
|
||
username = gen_auth_get()$username, | ||
password = gen_auth_get()$password, | ||
selection = term, | ||
...) | ||
res <- lapply(gen_fun, function(db){ | ||
|
||
results_json <- test_if_json(results_raw) | ||
if (verbose) { | ||
|
||
if (length(results_json$List) == 0) { | ||
info <- paste("Started the processing of", rev_database_function(db), "database.") | ||
|
||
stop("No related terms found for your code.", call. = FALSE) | ||
message(info) | ||
|
||
} else { | ||
} | ||
|
||
# similarity von Woertern berechnen und nach diesen Ordnen? | ||
termslist <- c() | ||
par_list <- list(endpoint = "catalogue/terms", | ||
username = gen_auth_get(database = rev_database_function(db))$username, | ||
password = gen_auth_get(database = rev_database_function(db))$password, | ||
selection = term, | ||
...) | ||
|
||
termslist <- lapply(results_json$List, function(x) { | ||
results_raw <- do.call(db, par_list) | ||
|
||
append(termslist, x$Content) | ||
#--------------------------------------------------------------------------- | ||
|
||
}) | ||
results_json <- test_if_json(results_raw) | ||
|
||
termslist <- lapply(termslist, function(x) { | ||
if (length(results_json$List) == 0 & length(gen_fun) == 1) { | ||
|
||
gsub("\\s+", " ", x) | ||
stop("No related terms found for your code.", call. = FALSE) | ||
|
||
}) | ||
} else if (length(results_json$List) == 0 & length(gen_fun) > 1) { | ||
|
||
termslist <- unlist(termslist) | ||
termslist <- "No related terms found for your code." | ||
|
||
if (isTRUE(similarity)) { | ||
list_resp <- list("Output" = termslist) | ||
|
||
# generalized levenstein edit distance | ||
termslist <- termslist[order(utils::adist(term, | ||
termslist, | ||
ignore.case = TRUE))] | ||
} else { | ||
|
||
# nchar order | ||
termslist <- termslist[order(unlist(lapply(termslist, nchar)))] | ||
# similarity von Woertern berechnen und nach diesen Ordnen? | ||
|
||
} | ||
termslist <- c() | ||
|
||
termslist <- lapply(results_json$List, function(x) { | ||
|
||
append(termslist, x$Content) | ||
|
||
}) | ||
|
||
termslist <- lapply(termslist, function(x) { | ||
|
||
gsub("\\s+", " ", x) | ||
|
||
}) | ||
|
||
list_resp <- list("Output" = termslist) | ||
termslist <- unlist(termslist) | ||
|
||
#------------------------------------------------------------------------- | ||
|
||
if (isTRUE(similarity)) { | ||
|
||
# generalized Levenshtein edit distance | ||
termslist <- termslist[order(utils::adist(term, | ||
termslist, | ||
ignore.case = TRUE))] | ||
} else { | ||
|
||
# nchar order | ||
termslist <- termslist[order(unlist(lapply(termslist, nchar)))] | ||
|
||
} | ||
|
||
list_resp <- list("Output" = termslist) | ||
|
||
} | ||
|
||
attr(list_resp, "Term") <- term | ||
attr(list_resp, "Database") <- rev_database_function(db) | ||
attr(list_resp, "Language") <- results_json$Parameter$language | ||
attr(list_resp, "Pagelength") <- results_json$Parameter$pagelength | ||
attr(list_resp, "Copyright") <- results_json$Copyright | ||
|
||
return(list_resp) | ||
|
||
} | ||
}) | ||
|
||
res <- check_results(res) | ||
|
||
return(res) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,84 @@ | ||
#' Low-level function to interact with the Destatis GENESIS API | ||
#' gen_api | ||
#' | ||
#' @param endpoint Self-explanatory | ||
#' @description Low-level function to interact with the GENESIS API | ||
#' | ||
#' @param endpoint Character string. The endpoint of the API that is to be queried. | ||
#' | ||
#' @importFrom httr2 `%>%` | ||
#' | ||
#' @noRd | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' gen_api("helloworld/logincheck") %>% | ||
#' httr2::resp_body_json() | ||
#' } | ||
#' | ||
gen_api <- function(endpoint, ...) { | ||
|
||
httr2::request("https://www-genesis.destatis.de/genesisWS/rest/2020") %>% | ||
httr2::req_user_agent("https://github.com/CorrelAid/restatis") %>% | ||
httr2::req_url_path_append(endpoint) %>% | ||
httr2::req_url_query(!!!gen_auth_get(), ...) %>% | ||
httr2::req_url_query(!!!gen_auth_get(database = "genesis"), ...) %>% | ||
httr2::req_retry(max_tries = 3) %>% | ||
httr2::req_perform() | ||
|
||
} | ||
|
||
#------------------------------------------------------------------------------- | ||
|
||
#' gen_regio_api | ||
#' | ||
#' @description Low-level function to interact with the regionalstatistik.de API | ||
#' | ||
#' @param endpoint Character string. The endpoint of the API that is to be queried. | ||
#' | ||
#' @importFrom httr2 `%>%` | ||
#' | ||
#' @noRd | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' gen_regio_api("helloworld/logincheck") %>% | ||
#' httr2::resp_body_json() | ||
#' } | ||
#' | ||
gen_regio_api <- function(endpoint, ...) { | ||
|
||
httr2::request("https://www.regionalstatistik.de/genesisws/rest/2020/") %>% | ||
httr2::req_user_agent("https://github.com/CorrelAid/restatis") %>% | ||
httr2::req_url_path_append(endpoint) %>% | ||
httr2::req_url_query(!!!gen_auth_get(database = "regio"), ...) %>% | ||
httr2::req_retry(max_tries = 3) %>% | ||
httr2::req_perform() | ||
|
||
} | ||
|
||
#------------------------------------------------------------------------------- | ||
|
||
#' gen_zensus_api | ||
#' | ||
#' @description Low-level function to interact with the Zensus 2022 database | ||
#' | ||
#' @param endpoint Character string. The endpoint of the API that is to be queried. | ||
#' | ||
#' @importFrom httr2 `%>%` | ||
#' | ||
#' @noRd | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' gen_zensus_api("helloworld/logincheck") %>% | ||
#' httr2::resp_body_json() | ||
#' } | ||
#' | ||
gen_zensus_api <- function(endpoint, ...) { | ||
|
||
httr2::request("https://ergebnisse.zensus2022.de/api/rest/2020") %>% | ||
httr2::req_user_agent("https://github.com/CorrelAid/restatis") %>% | ||
httr2::req_url_path_append(endpoint) %>% | ||
httr2::req_url_query(!!!gen_auth_get(database = "zensus"), ...) %>% | ||
httr2::req_retry(max_tries = 3) %>% | ||
httr2::req_perform() | ||
|
||
} |
Oops, something went wrong.