Skip to content

Commit

Permalink
Merge pull request #32 from CorrelAid/dev
Browse files Browse the repository at this point in the history
  • Loading branch information
yannikbuhl authored Jul 23, 2024
2 parents 4ec7d47 + 97ff9c9 commit 3b90df7
Show file tree
Hide file tree
Showing 161 changed files with 13,522 additions and 8,623 deletions.
3 changes: 3 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,7 @@
^pkgdown$
^codecov\.yml$
^cran-comments\.md$
<<<<<<< HEAD
=======
^CRAN-SUBMISSION$
>>>>>>> main
32 changes: 17 additions & 15 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
Package: restatis
Title: Web API Client for the German Federal Statistical Office Database
Version: 0.1.0
Title: R Wrapper to Access a Wide Range of Germany's Federal Statistical System
Databases Based on the GENESIS Web Service RESTful API of the German Federal
Statistical Office (Statistisches Bundesamt/Destatis)
Version: 0.2.0
Authors@R: c(
person("Yannik", "Buhl", , "yannik.buhl@posteo.de", role = "aut"),
person("Zoran", "Kovacevic", , "[email protected]", role = "aut",
person("Yannik", "Buhl", , "ybuhl@posteo.de", role = c("aut", "cre")),
person("Zoran", "Kovacevic", role = "aut",
comment = c(ORCID = "0009-0002-0156-0862")),
person("Dorian", "Le Jeune", , "[email protected]", role = "aut"),
person("Long", "Nguyen", , "[email protected]", role = c("aut", "cre"),
person("Dorian", "Le Jeune", role = "aut"),
person("Long", "Nguyen", , "[email protected]", role = "aut",
comment = c(ORCID = "0000-0001-8878-7386")),
person("Johannes", "Ritter", , "[email protected]", role = "aut")
)
Description: A 'RESTful' API wrapper for accessing the 'GENESIS' database
of the German Federal Statistical Office (Destatis)
<https://www-genesis.destatis.de/>. Also supports data search
functions, credential management, result caching, and handling remote
background jobs for large datasets.
Description: A RESTful API wrapper for accessing the GENESIS database of
the German Federal Statistical Office (Destatis) as well as its Census
Database and the database of Germany's regional statistics. Supports data
search functions, credential management, result caching, and handling
remote background jobs for large datasets.
License: MIT + file LICENSE
URL: https://correlaid.github.io/restatis/,
https://github.com/CorrelAid/restatis
Expand All @@ -28,14 +30,14 @@ Imports:
readr,
stats,
tibble,
tools,
utils,
vctrs
vctrs,
purrr
Suggests:
httptest2,
knitr,
rmarkdown,
testthat (>= 3.0.0),
rvest,
usethis,
withr
VignetteBuilder:
Expand All @@ -44,4 +46,4 @@ Config/testthat/edition: 3
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
YEAR: 2022
YEAR: 2024
COPYRIGHT HOLDER: restatis authors
9 changes: 9 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,22 +1,31 @@
# Generated by roxygen2: do not edit by hand

export(gen_alternative_terms)
export(gen_auth_get)
export(gen_auth_save)
export(gen_catalogue)
export(gen_cube)
export(gen_download_job)
export(gen_find)
export(gen_list_jobs)
export(gen_logincheck)
export(gen_metadata)
export(gen_metadata_cube)
export(gen_metadata_statistic)
export(gen_metadata_stats)
export(gen_metadata_tab)
export(gen_metadata_table)
export(gen_metadata_val)
export(gen_metadata_value)
export(gen_metadata_var)
export(gen_metadata_variable)
export(gen_modified_data)
export(gen_objects2stat)
export(gen_objects2var)
export(gen_search_vars)
export(gen_signs)
export(gen_table)
export(gen_update_evas)
export(gen_val2var)
export(gen_val2var2stat)
export(gen_var2stat)
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# restatis 0.2.0

* Added support for regionalstatistik.de and the Zensus 2022 database
* Massively improved the handling of 'jobs' (e.g., adding gen_download_job())
* Added some helper functions such as gen_logincheck()
* Rewrote functions in the background

# restatis 0.1.0

* Initial CRAN submission.
9 changes: 4 additions & 5 deletions R/data.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
#' List of EVAS Codes
#'
#' @format ## `evas_list_long_20220724`
#' A data frame with 1,097 rows and 3 columns:
#' @format ## `evas_list`
#' A data frame with 1132 rows and 3 columns:
#' \describe{
#' \item{EVAS}{EVAS code}
#' \item{Beschreibung}{Details on the EVAS code}
#' \item{Titel}{Alternative description of EVAS code contents}
#' ...
#' \item{Titel}{Alternative desription of EVAS code contents}
#' }
#' @source <https://www.destatis.de/DE/Methoden/Revisionen/Glossar/EVAS.html>
"evas_list_long_20220724"
"evas_list"
138 changes: 95 additions & 43 deletions R/gen_alternative_terms.R
Original file line number Diff line number Diff line change
@@ -1,78 +1,130 @@
#' Search for Related Terms Based on the Same Specific String
#' gen_alternative_terms
#'
#' @description Function to find terms in GENESIS that are similar or related based on a simple comparison of strings. This can help to identify alternative search terms.
#' @description Function to find search terms that are similar or related to one another in spelling and also represented in the GENESIS, Zensus 2022 or regionalstatistik.de databases. Important note: The API call is searching for terms with the same characters. To be useful in searching for related terms it is highly recommended to work with "*" placeholders (see examples). The placeholder can be placed before and/or after the search term.
#'
#' @param term Character string. Maximum length of 15 characters. Term or word for which you are searching for alternative or related terms. Use of '*' as a placeholder is possible to generate broader search areas.
#' @param similarity Logical. Indicator if the output of the function should be sorted based on a Levenshtein edit distance based on the \code{adist()} function.
#' @param ... Additional parameter of the GENESIS API call. These parameters are only affecting the GENESIS API call itself, no further processing.
#' @param similarity Boolean. Indicator if the output of the function should be sorted based on a Levenshtein edit distance based on the \code{adist()} function. Default is 'TRUE'.
#' @param database Character string. Indicator if the GENESIS ('genesis'), Zensus 2022 ('zensus') or regionalstatistik.de ('regio') database is called. Default option is 'all'.
#' @param verbose Boolean. Indicator if the output of the function should include detailed messages and warnings. Default option is 'TRUE'. Set the parameter to 'FALSE' to suppress additional messages and warnings.
#' @param ... Additional parameters for the API call. These parameters are only affecting the call itself, no further processing. For more details see `vignette("additional_parameter")`.
#'
#' @return A list with all recalled elements from GENESIS. Attributes are added to the data.frame, describing the search configuration for the returned output.
#' @return A list with all recollected elements from the respective database. Attributes are added to the data.frame, describing the search configuration for the returned output.
#' @export
#'
#' @examples
#' \dontrun{
#' # Find terms that are similar (in spelling) to search term "bus"
#' # Find terms at GENESIS that are the same (in spelling) to search term "bus"
#' # and sort them by Levenshtein edit distance
#' object <- gen_alternative_terms(term = "bus", similarity = TRUE)
#' object <- gen_alternative_terms(term = "bus", similarity = TRUE, database = "genesis")
#'
#' # Find terms that are related (in spelling) to search term "bus"
#' object <- gen_alternative_terms(term = "bus*", similarity = TRUE)
#' # Find terms at GENESIS that are related (in spelling) to search term "bus"
#' object <- gen_alternative_terms(term = "bus*", similarity = TRUE, database = "genesis")
#'
#' # Find terms at Zensus 2022 that are related (in spelling) to search term "wohn"
#' object <- gen_alternative_terms(term = "wohn*", similarity = TRUE, database = "zensus")
#' }
#'
gen_alternative_terms <- function(term = NULL,
similarity = TRUE,
database = c("all", "genesis", "zensus", "regio"),
verbose = TRUE,
...) {
caller <- as.character(match.call()[1])

check_function_input(
term = term,
similarity = similarity,
caller = caller
)
gen_fun <- test_database_function(database,
error.input = TRUE,
text = verbose)

check_function_input(term = term,
similarity = similarity,
caller = caller,
verbose = verbose)

#-----------------------------------------------------------------------------

res <- lapply(gen_fun, function(db){

if (verbose) {

info <- paste("Started the processing of", rev_database_function(db), "database.")

message(info)

}

par_list <- list(endpoint = "catalogue/terms",
username = gen_auth_get(database = rev_database_function(db))$username,
password = gen_auth_get(database = rev_database_function(db))$password,
selection = term,
...)

#-------------------------------------------------------------------------------
results_raw <- do.call(db, par_list)

results_raw <- gen_api("catalogue/terms",
selection = term,
...
)
#---------------------------------------------------------------------------

results_json <- test_if_json(results_raw)
results_json <- test_if_json(results_raw)

if (length(results_json$List) == 0) {
stop("No related terms found for your code.", call. = FALSE)
} else {
# similarity von Woertern berechnen und nach diesen Ordnen?
termslist <- c()
if (length(results_json$List) == 0 & length(gen_fun) == 1) {

termslist <- lapply(results_json$List, function(x) {
append(termslist, x$Content)
})
stop("No related terms found for your code.", call. = FALSE)

termslist <- lapply(termslist, function(x) {
gsub("\\s+", " ", x)
})
} else if (length(results_json$List) == 0 & length(gen_fun) > 1) {

termslist <- unlist(termslist)
termslist <- "No related terms found for your code."

list_resp <- list("Output" = termslist)

if (isTRUE(similarity)) {
# generalized levenstein edit distance
termslist <- termslist[order(utils::adist(term,
termslist,
ignore.case = TRUE
))]
} else {
# nchar order
termslist <- termslist[order(unlist(lapply(termslist, nchar)))]
}

list_resp <- list("Output" = termslist)
# similarity von Woertern berechnen und nach diesen Ordnen?

termslist <- c()

termslist <- lapply(results_json$List, function(x) {

append(termslist, x$Content)

})

termslist <- lapply(termslist, function(x) {

gsub("\\s+", " ", x)

})

termslist <- unlist(termslist)

#-------------------------------------------------------------------------

if (isTRUE(similarity)) {

# generalized Levenshtein edit distance
termslist <- termslist[order(utils::adist(term,
termslist,
ignore.case = TRUE))]
} else {

# nchar order
termslist <- termslist[order(unlist(lapply(termslist, nchar)))]

}

list_resp <- list("Output" = termslist)

}

attr(list_resp, "Term") <- term
attr(list_resp, "Database") <- rev_database_function(db)
attr(list_resp, "Language") <- results_json$Parameter$language
attr(list_resp, "Pagelength") <- results_json$Parameter$pagelength
attr(list_resp, "Copyright") <- results_json$Copyright

return(list_resp)
}

})

res <- check_results(res)

return(res)

}
71 changes: 68 additions & 3 deletions R/gen_api.R
Original file line number Diff line number Diff line change
@@ -1,19 +1,84 @@
#' Lower-Level Function to Interact With the GENESIS API
#' gen_api
#'
#' @param endpoint Self-explanatory
#' @description Low-level function to interact with the GENESIS API
#'
#' @param endpoint Character string. The endpoint of the API that is to be queried.
#'
#' @importFrom httr2 `%>%`
#'
#' @noRd
#'
#' @examples
#' \dontrun{
#' gen_api("helloworld/logincheck") %>%
#' httr2::resp_body_json()
#' }
#'
gen_api <- function(endpoint, ...) {

httr2::request("https://www-genesis.destatis.de/genesisWS/rest/2020") %>%
httr2::req_user_agent("https://github.com/CorrelAid/restatis") %>%
httr2::req_url_path_append(endpoint) %>%
httr2::req_url_query(!!!gen_auth_get(), ...) %>%
httr2::req_url_query(!!!gen_auth_get(database = "genesis"), ...) %>%
httr2::req_retry(max_tries = 3) %>%
httr2::req_perform()

}

#-------------------------------------------------------------------------------

#' gen_regio_api
#'
#' @description Low-level function to interact with the regionalstatistik.de API
#'
#' @param endpoint Character string. The endpoint of the API that is to be queried.
#'
#' @importFrom httr2 `%>%`
#'
#' @noRd
#'
#' @examples
#' \dontrun{
#' gen_regio_api("helloworld/logincheck") %>%
#' httr2::resp_body_json()
#' }
#'
gen_regio_api <- function(endpoint, ...) {

httr2::request("https://www.regionalstatistik.de/genesisws/rest/2020/") %>%
httr2::req_user_agent("https://github.com/CorrelAid/restatis") %>%
httr2::req_url_path_append(endpoint) %>%
httr2::req_url_query(!!!gen_auth_get(database = "regio"), ...) %>%
httr2::req_retry(max_tries = 3) %>%
httr2::req_perform()

}

#-------------------------------------------------------------------------------

#' gen_zensus_api
#'
#' @description Low-level function to interact with the Zensus 2022 database
#'
#' @param endpoint Character string. The endpoint of the API that is to be queried.
#'
#' @importFrom httr2 `%>%`
#'
#' @noRd
#'
#' @examples
#' \dontrun{
#' gen_zensus_api("helloworld/logincheck") %>%
#' httr2::resp_body_json()
#' }
#'
gen_zensus_api <- function(endpoint, ...) {

httr2::request("https://ergebnisse.zensus2022.de/api/rest/2020") %>%
httr2::req_user_agent("https://github.com/CorrelAid/restatis") %>%
httr2::req_url_path_append(endpoint) %>%
httr2::req_url_query(!!!gen_auth_get(database = "zensus"), ...) %>%
httr2::req_retry(max_tries = 3) %>%
httr2::req_perform()

}
Loading

0 comments on commit 3b90df7

Please sign in to comment.