Skip to content

Commit

Permalink
Merge pull request #31 from CorrelAid/zensus
Browse files Browse the repository at this point in the history
Merge branch 'zensus' into 'dev'
  • Loading branch information
yannikbuhl authored Jul 20, 2024
2 parents a4e9f43 + c38f3fd commit 0279867
Show file tree
Hide file tree
Showing 161 changed files with 12,064 additions and 9,008 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@
^docs$
^pkgdown$
^codecov\.yml$
^cran-comments\.md$
23 changes: 14 additions & 9 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
Package: restatis
Title: R Wrapper for the GENESIS Web Service RESTful API of the German
Title: R Wrapper to access a wide range of Germany's Federal Statistical System
databases based on the GENESIS Web Service RESTful API of the German
Federal Statistical Office (Statistisches Bundesamt/Destatis)
Version: 0.1.0
Version: 0.2.0
Authors@R: c(
person("Yannik", "Buhl", role = "aut"),
person("Yannik", "Buhl", , "[email protected]", role = c("aut", "cre")),
person("Zoran", "Kovacevic", role = "aut",
comment = c(ORCID = "0009-0002-0156-0862")),
person("Dorian", "Le Jeune", role = "aut"),
person("Long", "Nguyen", , "[email protected]", role = c("aut", "cre"),
person("Long", "Nguyen", , "[email protected]", role = "aut",
comment = c(ORCID = "0000-0001-8878-7386")),
person("Johannes", "Ritter", role = "aut")
)
Description: A RESTful API wrapper for accessing the GENESIS database of
the German Federal Statistical Office (Destatis). Also supports data
search functions, credential management, result caching, and handling
the German Federal Statistical Office (Destatis) as well as its Census
Database and the database of Germany's regional statistics. Supports data
search functions, credential management, result caching, and handling
remote background jobs for large datasets.
License: MIT + file LICENSE
URL: https://correlaid.github.io/restatis/
Expand All @@ -25,16 +27,19 @@ Imports:
memoise,
readr,
tibble,
vctrs
vctrs,
purrr
Suggests:
httptest2,
usethis,
knitr,
rmarkdown,
testthat (>= 3.0.0)
testthat (>= 3.0.0),
rvest
VignetteBuilder:
knitr
Config/testthat/edition: 3
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
YEAR: 2022
YEAR: 2024
COPYRIGHT HOLDER: restatis authors
13 changes: 9 additions & 4 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,22 +1,27 @@
# Generated by roxygen2: do not edit by hand

export(gen_alternative_terms)
export(gen_auth_get)
export(gen_auth_save)
export(gen_catalogue)
export(gen_cube)
export(gen_download_job)
export(gen_find)
export(gen_list_jobs)
export(gen_logincheck)
export(gen_metadata)
export(gen_metadata_cube)
export(gen_metadata_stats)
export(gen_metadata_tab)
export(gen_metadata_val)
export(gen_metadata_var)
export(gen_metadata_statistic)
export(gen_metadata_table)
export(gen_metadata_value)
export(gen_metadata_variable)
export(gen_modified_data)
export(gen_objects2stat)
export(gen_objects2var)
export(gen_search_vars)
export(gen_signs)
export(gen_table)
export(gen_update_evas)
export(gen_val2var)
export(gen_val2var2stat)
export(gen_var2stat)
Expand Down
7 changes: 3 additions & 4 deletions R/data.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
#' List of EVAS codes
#'
#' @format ## `evas_list_long_20220724`
#' A data frame with 1,097 rows and 3 columns:
#' @format ## `evas_list`
#' A data frame with 1132 rows and 3 columns:
#' \describe{
#' \item{EVAS}{EVAS code}
#' \item{Beschreibung}{Details on the EVAS code}
#' \item{Titel}{Alternative desription of EVAS code contents}
#' ...
#' }
#' @source <https://www.destatis.de/DE/Methoden/Revisionen/Glossar/EVAS.html>
"evas_list_long_20220724"
"evas_list"
121 changes: 81 additions & 40 deletions R/gen_alternative_terms.R
Original file line number Diff line number Diff line change
@@ -1,90 +1,131 @@
#' gen_alternative_terms: Call For Similiar or Spelling Related Terms for Further Search
#' gen_alternative_terms
#'
#' @description Function to find search terms that are similar or related to one another and also represented in Genesis.
#' @description Function to find search terms that are similar or related to one another in spelling and also represented in the GENESIS, Zensus 2022 or regionalstatistik.de databases. Important note: The API call is searching for terms with the same characters. To be useful in searching for related terms it is highly recommended to work with "*" placeholders (see examples). The placeholder can be placed before and/or after the search term.
#'
#' @param term Character string. Maximum length of 15 characters. Term or word for which you are searching for alternative or related terms. Use of '*' as a placeholder is possible to generate broader search areas.
#' @param similarity Logical. Indicator if the output of the function should be sorted based on a Levenshtein edit distance based on the \code{adist()} function.
#' @param ... Additional parameters for the Genesis API call. These parameters are only affecting the Genesis call itself, no further processing. For more details see `vignette("additional_parameter")`.
#' @param similarity Boolean. Indicator if the output of the function should be sorted based on a Levenshtein edit distance based on the \code{adist()} function. Default is 'TRUE'.
#' @param database Character string. Indicator if the GENESIS ('genesis'), Zensus 2022 ('zensus') or regionalstatistik.de ('regio') database is called. Default option is 'all'.
#' @param verbose Boolean. Indicator if the output of the function should include detailed messages and warnings. Default option is 'TRUE'. Set the parameter to 'FALSE' to suppress additional messages and warnings.
#' @param ... Additional parameters for the API call. These parameters are only affecting the call itself, no further processing. For more details see `vignette("additional_parameter")`.
#'
#' @return A list with all recalled elements from Genesis. Attributes are added to the data.frame, describing the search configuration for the returned output.
#' @return A list with all recollected elements from the respective database. Attributes are added to the data.frame, describing the search configuration for the returned output.
#' @export
#'
#' @examples
#' \dontrun{
#' # Find terms that are similar (in spelling) to search term "bus"
#' # Find terms at GENESIS that are the same (in spelling) to search term "bus"
#' # and sort them by Levenshtein edit distance
#' object <- gen_alternative_terms(term = "bus", similarity = TRUE)
#' object <- gen_alternative_terms(term = "bus", similarity = TRUE, database = "genesis")
#'
#' # Find terms that are related (in spelling) to search term "bus"
#' object <- gen_alternative_terms(term = "bus*", similarity = TRUE)
#' # Find terms at GENESIS that are related (in spelling) to search term "bus"
#' object <- gen_alternative_terms(term = "bus*", similarity = TRUE, database = "genesis")
#'
#' # Find terms at Zensus 2022 that are related (in spelling) to search term "wohn"
#' object <- gen_alternative_terms(term = "wohn*", similarity = TRUE, database = "zensus")
#' }
#'
gen_alternative_terms <- function(term = NULL,
similarity = TRUE,
database = c("all", "genesis", "zensus", "regio"),
verbose = TRUE,
...) {

caller <- as.character(match.call()[1])

gen_fun <- test_database_function(database,
error.input = TRUE,
text = verbose)

check_function_input(term = term,
similarity = similarity,
caller = caller)

#-------------------------------------------------------------------------------
caller = caller,
verbose = verbose)

results_raw <- gen_api("catalogue/terms",
#-----------------------------------------------------------------------------

username = gen_auth_get()$username,
password = gen_auth_get()$password,
selection = term,
...)
res <- lapply(gen_fun, function(db){

results_json <- test_if_json(results_raw)
if (verbose) {

if (length(results_json$List) == 0) {
info <- paste("Started the processing of", rev_database_function(db), "database.")

stop("No related terms found for your code.", call. = FALSE)
message(info)

} else {
}

# similarity von Woertern berechnen und nach diesen Ordnen?
termslist <- c()
par_list <- list(endpoint = "catalogue/terms",
username = gen_auth_get(database = rev_database_function(db))$username,
password = gen_auth_get(database = rev_database_function(db))$password,
selection = term,
...)

termslist <- lapply(results_json$List, function(x) {
results_raw <- do.call(db, par_list)

append(termslist, x$Content)
#---------------------------------------------------------------------------

})
results_json <- test_if_json(results_raw)

termslist <- lapply(termslist, function(x) {
if (length(results_json$List) == 0 & length(gen_fun) == 1) {

gsub("\\s+", " ", x)
stop("No related terms found for your code.", call. = FALSE)

})
} else if (length(results_json$List) == 0 & length(gen_fun) > 1) {

termslist <- unlist(termslist)
termslist <- "No related terms found for your code."

if (isTRUE(similarity)) {
list_resp <- list("Output" = termslist)

# generalized levenstein edit distance
termslist <- termslist[order(utils::adist(term,
termslist,
ignore.case = TRUE))]
} else {

# nchar order
termslist <- termslist[order(unlist(lapply(termslist, nchar)))]
# similarity von Woertern berechnen und nach diesen Ordnen?

}
termslist <- c()

termslist <- lapply(results_json$List, function(x) {

append(termslist, x$Content)

})

termslist <- lapply(termslist, function(x) {

gsub("\\s+", " ", x)

})

list_resp <- list("Output" = termslist)
termslist <- unlist(termslist)

#-------------------------------------------------------------------------

if (isTRUE(similarity)) {

# generalized Levenshtein edit distance
termslist <- termslist[order(utils::adist(term,
termslist,
ignore.case = TRUE))]
} else {

# nchar order
termslist <- termslist[order(unlist(lapply(termslist, nchar)))]

}

list_resp <- list("Output" = termslist)

}

attr(list_resp, "Term") <- term
attr(list_resp, "Database") <- rev_database_function(db)
attr(list_resp, "Language") <- results_json$Parameter$language
attr(list_resp, "Pagelength") <- results_json$Parameter$pagelength
attr(list_resp, "Copyright") <- results_json$Copyright

return(list_resp)

}
})

res <- check_results(res)

return(res)

}
71 changes: 68 additions & 3 deletions R/gen_api.R
Original file line number Diff line number Diff line change
@@ -1,19 +1,84 @@
#' Low-level function to interact with the Destatis GENESIS API
#' gen_api
#'
#' @param endpoint Self-explanatory
#' @description Low-level function to interact with the GENESIS API
#'
#' @param endpoint Character string. The endpoint of the API that is to be queried.
#'
#' @importFrom httr2 `%>%`
#'
#' @noRd
#'
#' @examples
#' \dontrun{
#' gen_api("helloworld/logincheck") %>%
#' httr2::resp_body_json()
#' }
#'
gen_api <- function(endpoint, ...) {

httr2::request("https://www-genesis.destatis.de/genesisWS/rest/2020") %>%
httr2::req_user_agent("https://github.com/CorrelAid/restatis") %>%
httr2::req_url_path_append(endpoint) %>%
httr2::req_url_query(!!!gen_auth_get(), ...) %>%
httr2::req_url_query(!!!gen_auth_get(database = "genesis"), ...) %>%
httr2::req_retry(max_tries = 3) %>%
httr2::req_perform()

}

#-------------------------------------------------------------------------------

#' gen_regio_api
#'
#' @description Low-level function to interact with the regionalstatistik.de API
#'
#' @param endpoint Character string. The endpoint of the API that is to be queried.
#'
#' @importFrom httr2 `%>%`
#'
#' @noRd
#'
#' @examples
#' \dontrun{
#' gen_regio_api("helloworld/logincheck") %>%
#' httr2::resp_body_json()
#' }
#'
gen_regio_api <- function(endpoint, ...) {

httr2::request("https://www.regionalstatistik.de/genesisws/rest/2020/") %>%
httr2::req_user_agent("https://github.com/CorrelAid/restatis") %>%
httr2::req_url_path_append(endpoint) %>%
httr2::req_url_query(!!!gen_auth_get(database = "regio"), ...) %>%
httr2::req_retry(max_tries = 3) %>%
httr2::req_perform()

}

#-------------------------------------------------------------------------------

#' gen_zensus_api
#'
#' @description Low-level function to interact with the Zensus 2022 database
#'
#' @param endpoint Character string. The endpoint of the API that is to be queried.
#'
#' @importFrom httr2 `%>%`
#'
#' @noRd
#'
#' @examples
#' \dontrun{
#' gen_zensus_api("helloworld/logincheck") %>%
#' httr2::resp_body_json()
#' }
#'
gen_zensus_api <- function(endpoint, ...) {

httr2::request("https://ergebnisse.zensus2022.de/api/rest/2020") %>%
httr2::req_user_agent("https://github.com/CorrelAid/restatis") %>%
httr2::req_url_path_append(endpoint) %>%
httr2::req_url_query(!!!gen_auth_get(database = "zensus"), ...) %>%
httr2::req_retry(max_tries = 3) %>%
httr2::req_perform()

}
Loading

0 comments on commit 0279867

Please sign in to comment.