generated from CorrelAid/r-bare-template
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #32 from CorrelAid/dev
- Loading branch information
Showing
161 changed files
with
13,522 additions
and
8,623 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,4 +10,7 @@ | |
^pkgdown$ | ||
^codecov\.yml$ | ||
^cran-comments\.md$ | ||
<<<<<<< HEAD | ||
======= | ||
^CRAN-SUBMISSION$ | ||
>>>>>>> main |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,22 @@ | ||
Package: restatis | ||
Title: Web API Client for the German Federal Statistical Office Database | ||
Version: 0.1.0 | ||
Title: R Wrapper to Access a Wide Range of Germany's Federal Statistical System | ||
Databases Based on the GENESIS Web Service RESTful API of the German Federal | ||
Statistical Office (Statistisches Bundesamt/Destatis) | ||
Version: 0.2.0 | ||
Authors@R: c( | ||
person("Yannik", "Buhl", , "yannik.buhl@posteo.de", role = "aut"), | ||
person("Zoran", "Kovacevic", , "[email protected]", role = "aut", | ||
person("Yannik", "Buhl", , "ybuhl@posteo.de", role = c("aut", "cre")), | ||
person("Zoran", "Kovacevic", role = "aut", | ||
comment = c(ORCID = "0009-0002-0156-0862")), | ||
person("Dorian", "Le Jeune", , "[email protected]", role = "aut"), | ||
person("Long", "Nguyen", , "[email protected]", role = c("aut", "cre"), | ||
person("Dorian", "Le Jeune", role = "aut"), | ||
person("Long", "Nguyen", , "[email protected]", role = "aut", | ||
comment = c(ORCID = "0000-0001-8878-7386")), | ||
person("Johannes", "Ritter", , "[email protected]", role = "aut") | ||
) | ||
Description: A 'RESTful' API wrapper for accessing the 'GENESIS' database | ||
of the German Federal Statistical Office (Destatis) | ||
<https://www-genesis.destatis.de/>. Also supports data search | ||
functions, credential management, result caching, and handling remote | ||
background jobs for large datasets. | ||
Description: A RESTful API wrapper for accessing the GENESIS database of | ||
the German Federal Statistical Office (Destatis) as well as its Census | ||
Database and the database of Germany's regional statistics. Supports data | ||
search functions, credential management, result caching, and handling | ||
remote background jobs for large datasets. | ||
License: MIT + file LICENSE | ||
URL: https://correlaid.github.io/restatis/, | ||
https://github.com/CorrelAid/restatis | ||
|
@@ -28,14 +30,14 @@ Imports: | |
readr, | ||
stats, | ||
tibble, | ||
tools, | ||
utils, | ||
vctrs | ||
vctrs, | ||
purrr | ||
Suggests: | ||
httptest2, | ||
knitr, | ||
rmarkdown, | ||
testthat (>= 3.0.0), | ||
rvest, | ||
usethis, | ||
withr | ||
VignetteBuilder: | ||
|
@@ -44,4 +46,4 @@ Config/testthat/edition: 3 | |
Encoding: UTF-8 | ||
LazyData: true | ||
Roxygen: list(markdown = TRUE) | ||
RoxygenNote: 7.2.3 | ||
RoxygenNote: 7.3.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
YEAR: 2022 | ||
YEAR: 2024 | ||
COPYRIGHT HOLDER: restatis authors |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,10 @@ | ||
# restatis 0.2.0 | ||
|
||
* Added support for regionalstatistik.de and the Zensus 2022 database | ||
* Massively improved the handling of 'jobs' (e.g., adding gen_download_job()) | ||
* Added some helper functions such as gen_logincheck() | ||
* Rewrote functions in the background | ||
|
||
# restatis 0.1.0 | ||
|
||
* Initial CRAN submission. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,11 @@ | ||
#' List of EVAS Codes | ||
#' | ||
#' @format ## `evas_list_long_20220724` | ||
#' A data frame with 1,097 rows and 3 columns: | ||
#' @format ## `evas_list` | ||
#' A data frame with 1132 rows and 3 columns: | ||
#' \describe{ | ||
#' \item{EVAS}{EVAS code} | ||
#' \item{Beschreibung}{Details on the EVAS code} | ||
#' \item{Titel}{Alternative description of EVAS code contents} | ||
#' ... | ||
#' \item{Titel}{Alternative desription of EVAS code contents} | ||
#' } | ||
#' @source <https://www.destatis.de/DE/Methoden/Revisionen/Glossar/EVAS.html> | ||
"evas_list_long_20220724" | ||
"evas_list" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,78 +1,130 @@ | ||
#' Search for Related Terms Based on the Same Specific String | ||
#' gen_alternative_terms | ||
#' | ||
#' @description Function to find terms in GENESIS that are similar or related based on a simple comparison of strings. This can help to identify alternative search terms. | ||
#' @description Function to find search terms that are similar or related to one another in spelling and also represented in the GENESIS, Zensus 2022 or regionalstatistik.de databases. Important note: The API call is searching for terms with the same characters. To be useful in searching for related terms it is highly recommended to work with "*" placeholders (see examples). The placeholder can be placed before and/or after the search term. | ||
#' | ||
#' @param term Character string. Maximum length of 15 characters. Term or word for which you are searching for alternative or related terms. Use of '*' as a placeholder is possible to generate broader search areas. | ||
#' @param similarity Logical. Indicator if the output of the function should be sorted based on a Levenshtein edit distance based on the \code{adist()} function. | ||
#' @param ... Additional parameter of the GENESIS API call. These parameters are only affecting the GENESIS API call itself, no further processing. | ||
#' @param similarity Boolean. Indicator if the output of the function should be sorted based on a Levenshtein edit distance based on the \code{adist()} function. Default is 'TRUE'. | ||
#' @param database Character string. Indicator if the GENESIS ('genesis'), Zensus 2022 ('zensus') or regionalstatistik.de ('regio') database is called. Default option is 'all'. | ||
#' @param verbose Boolean. Indicator if the output of the function should include detailed messages and warnings. Default option is 'TRUE'. Set the parameter to 'FALSE' to suppress additional messages and warnings. | ||
#' @param ... Additional parameters for the API call. These parameters are only affecting the call itself, no further processing. For more details see `vignette("additional_parameter")`. | ||
#' | ||
#' @return A list with all recalled elements from GENESIS. Attributes are added to the data.frame, describing the search configuration for the returned output. | ||
#' @return A list with all recollected elements from the respective database. Attributes are added to the data.frame, describing the search configuration for the returned output. | ||
#' @export | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' # Find terms that are similar (in spelling) to search term "bus" | ||
#' # Find terms at GENESIS that are the same (in spelling) to search term "bus" | ||
#' # and sort them by Levenshtein edit distance | ||
#' object <- gen_alternative_terms(term = "bus", similarity = TRUE) | ||
#' object <- gen_alternative_terms(term = "bus", similarity = TRUE, database = "genesis") | ||
#' | ||
#' # Find terms that are related (in spelling) to search term "bus" | ||
#' object <- gen_alternative_terms(term = "bus*", similarity = TRUE) | ||
#' # Find terms at GENESIS that are related (in spelling) to search term "bus" | ||
#' object <- gen_alternative_terms(term = "bus*", similarity = TRUE, database = "genesis") | ||
#' | ||
#' # Find terms at Zensus 2022 that are related (in spelling) to search term "wohn" | ||
#' object <- gen_alternative_terms(term = "wohn*", similarity = TRUE, database = "zensus") | ||
#' } | ||
#' | ||
gen_alternative_terms <- function(term = NULL, | ||
similarity = TRUE, | ||
database = c("all", "genesis", "zensus", "regio"), | ||
verbose = TRUE, | ||
...) { | ||
caller <- as.character(match.call()[1]) | ||
|
||
check_function_input( | ||
term = term, | ||
similarity = similarity, | ||
caller = caller | ||
) | ||
gen_fun <- test_database_function(database, | ||
error.input = TRUE, | ||
text = verbose) | ||
|
||
check_function_input(term = term, | ||
similarity = similarity, | ||
caller = caller, | ||
verbose = verbose) | ||
|
||
#----------------------------------------------------------------------------- | ||
|
||
res <- lapply(gen_fun, function(db){ | ||
|
||
if (verbose) { | ||
|
||
info <- paste("Started the processing of", rev_database_function(db), "database.") | ||
|
||
message(info) | ||
|
||
} | ||
|
||
par_list <- list(endpoint = "catalogue/terms", | ||
username = gen_auth_get(database = rev_database_function(db))$username, | ||
password = gen_auth_get(database = rev_database_function(db))$password, | ||
selection = term, | ||
...) | ||
|
||
#------------------------------------------------------------------------------- | ||
results_raw <- do.call(db, par_list) | ||
|
||
results_raw <- gen_api("catalogue/terms", | ||
selection = term, | ||
... | ||
) | ||
#--------------------------------------------------------------------------- | ||
|
||
results_json <- test_if_json(results_raw) | ||
results_json <- test_if_json(results_raw) | ||
|
||
if (length(results_json$List) == 0) { | ||
stop("No related terms found for your code.", call. = FALSE) | ||
} else { | ||
# similarity von Woertern berechnen und nach diesen Ordnen? | ||
termslist <- c() | ||
if (length(results_json$List) == 0 & length(gen_fun) == 1) { | ||
|
||
termslist <- lapply(results_json$List, function(x) { | ||
append(termslist, x$Content) | ||
}) | ||
stop("No related terms found for your code.", call. = FALSE) | ||
|
||
termslist <- lapply(termslist, function(x) { | ||
gsub("\\s+", " ", x) | ||
}) | ||
} else if (length(results_json$List) == 0 & length(gen_fun) > 1) { | ||
|
||
termslist <- unlist(termslist) | ||
termslist <- "No related terms found for your code." | ||
|
||
list_resp <- list("Output" = termslist) | ||
|
||
if (isTRUE(similarity)) { | ||
# generalized levenstein edit distance | ||
termslist <- termslist[order(utils::adist(term, | ||
termslist, | ||
ignore.case = TRUE | ||
))] | ||
} else { | ||
# nchar order | ||
termslist <- termslist[order(unlist(lapply(termslist, nchar)))] | ||
} | ||
|
||
list_resp <- list("Output" = termslist) | ||
# similarity von Woertern berechnen und nach diesen Ordnen? | ||
|
||
termslist <- c() | ||
|
||
termslist <- lapply(results_json$List, function(x) { | ||
|
||
append(termslist, x$Content) | ||
|
||
}) | ||
|
||
termslist <- lapply(termslist, function(x) { | ||
|
||
gsub("\\s+", " ", x) | ||
|
||
}) | ||
|
||
termslist <- unlist(termslist) | ||
|
||
#------------------------------------------------------------------------- | ||
|
||
if (isTRUE(similarity)) { | ||
|
||
# generalized Levenshtein edit distance | ||
termslist <- termslist[order(utils::adist(term, | ||
termslist, | ||
ignore.case = TRUE))] | ||
} else { | ||
|
||
# nchar order | ||
termslist <- termslist[order(unlist(lapply(termslist, nchar)))] | ||
|
||
} | ||
|
||
list_resp <- list("Output" = termslist) | ||
|
||
} | ||
|
||
attr(list_resp, "Term") <- term | ||
attr(list_resp, "Database") <- rev_database_function(db) | ||
attr(list_resp, "Language") <- results_json$Parameter$language | ||
attr(list_resp, "Pagelength") <- results_json$Parameter$pagelength | ||
attr(list_resp, "Copyright") <- results_json$Copyright | ||
|
||
return(list_resp) | ||
} | ||
|
||
}) | ||
|
||
res <- check_results(res) | ||
|
||
return(res) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,84 @@ | ||
#' Lower-Level Function to Interact With the GENESIS API | ||
#' gen_api | ||
#' | ||
#' @param endpoint Self-explanatory | ||
#' @description Low-level function to interact with the GENESIS API | ||
#' | ||
#' @param endpoint Character string. The endpoint of the API that is to be queried. | ||
#' | ||
#' @importFrom httr2 `%>%` | ||
#' | ||
#' @noRd | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' gen_api("helloworld/logincheck") %>% | ||
#' httr2::resp_body_json() | ||
#' } | ||
#' | ||
gen_api <- function(endpoint, ...) { | ||
|
||
httr2::request("https://www-genesis.destatis.de/genesisWS/rest/2020") %>% | ||
httr2::req_user_agent("https://github.com/CorrelAid/restatis") %>% | ||
httr2::req_url_path_append(endpoint) %>% | ||
httr2::req_url_query(!!!gen_auth_get(), ...) %>% | ||
httr2::req_url_query(!!!gen_auth_get(database = "genesis"), ...) %>% | ||
httr2::req_retry(max_tries = 3) %>% | ||
httr2::req_perform() | ||
|
||
} | ||
|
||
#------------------------------------------------------------------------------- | ||
|
||
#' gen_regio_api | ||
#' | ||
#' @description Low-level function to interact with the regionalstatistik.de API | ||
#' | ||
#' @param endpoint Character string. The endpoint of the API that is to be queried. | ||
#' | ||
#' @importFrom httr2 `%>%` | ||
#' | ||
#' @noRd | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' gen_regio_api("helloworld/logincheck") %>% | ||
#' httr2::resp_body_json() | ||
#' } | ||
#' | ||
gen_regio_api <- function(endpoint, ...) { | ||
|
||
httr2::request("https://www.regionalstatistik.de/genesisws/rest/2020/") %>% | ||
httr2::req_user_agent("https://github.com/CorrelAid/restatis") %>% | ||
httr2::req_url_path_append(endpoint) %>% | ||
httr2::req_url_query(!!!gen_auth_get(database = "regio"), ...) %>% | ||
httr2::req_retry(max_tries = 3) %>% | ||
httr2::req_perform() | ||
|
||
} | ||
|
||
#------------------------------------------------------------------------------- | ||
|
||
#' gen_zensus_api | ||
#' | ||
#' @description Low-level function to interact with the Zensus 2022 database | ||
#' | ||
#' @param endpoint Character string. The endpoint of the API that is to be queried. | ||
#' | ||
#' @importFrom httr2 `%>%` | ||
#' | ||
#' @noRd | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' gen_zensus_api("helloworld/logincheck") %>% | ||
#' httr2::resp_body_json() | ||
#' } | ||
#' | ||
gen_zensus_api <- function(endpoint, ...) { | ||
|
||
httr2::request("https://ergebnisse.zensus2022.de/api/rest/2020") %>% | ||
httr2::req_user_agent("https://github.com/CorrelAid/restatis") %>% | ||
httr2::req_url_path_append(endpoint) %>% | ||
httr2::req_url_query(!!!gen_auth_get(database = "zensus"), ...) %>% | ||
httr2::req_retry(max_tries = 3) %>% | ||
httr2::req_perform() | ||
|
||
} |
Oops, something went wrong.