Merge pull request #31 from CorrelAid/zensus

Merge branch 'zensus' into 'dev'
CorrelAid · Jul 20, 2024 · 0279867 · 0279867
2 parents a4e9f43 + c38f3fd
commit 0279867
Show file tree

Hide file tree

Showing 161 changed files with 12,064 additions and 9,008 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -9,3 +9,4 @@
 ^docs$
 ^pkgdown$
 ^codecov\.yml$
+^cran-comments\.md$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,19 +1,21 @@
 Package: restatis
-Title: R Wrapper for the GENESIS Web Service RESTful API of the German
+Title: R Wrapper to access a wide range of Germany's Federal Statistical System 
+    databases based on the GENESIS Web Service RESTful API of the German
     Federal Statistical Office (Statistisches Bundesamt/Destatis)
-Version: 0.1.0
+Version: 0.2.0
 Authors@R: c(
-    person("Yannik", "Buhl", role = "aut"),
+    person("Yannik", "Buhl", , "[email protected]", role = c("aut", "cre")),
     person("Zoran", "Kovacevic", role = "aut",
            comment = c(ORCID = "0009-0002-0156-0862")),
     person("Dorian", "Le Jeune", role = "aut"),
-    person("Long", "Nguyen", , "[email protected]", role = c("aut", "cre"),
+    person("Long", "Nguyen", , "[email protected]", role = "aut",
            comment = c(ORCID = "0000-0001-8878-7386")),
     person("Johannes", "Ritter", role = "aut")
   )
 Description: A RESTful API wrapper for accessing the GENESIS database of
-    the German Federal Statistical Office (Destatis). Also supports data
-    search functions, credential management, result caching, and handling
+    the German Federal Statistical Office (Destatis) as well as its Census 
+    Database and the database of Germany's regional statistics. Supports data 
+    search functions, credential management, result caching, and handling 
     remote background jobs for large datasets.
 License: MIT + file LICENSE
 URL: https://correlaid.github.io/restatis/
@@ -25,16 +27,19 @@ Imports:
     memoise,
     readr,
     tibble,
-    vctrs
+    vctrs,
+    purrr
 Suggests: 
     httptest2,
+    usethis,
     knitr,
     rmarkdown,
-    testthat (>= 3.0.0)
+    testthat (>= 3.0.0),
+    rvest
 VignetteBuilder: 
     knitr
 Config/testthat/edition: 3
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.2
diff --git a/LICENSE b/LICENSE
@@ -1,2 +1,2 @@
-YEAR: 2022
+YEAR: 2024
 COPYRIGHT HOLDER: restatis authors
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,22 +1,27 @@
 # Generated by roxygen2: do not edit by hand
 
 export(gen_alternative_terms)
+export(gen_auth_get)
 export(gen_auth_save)
 export(gen_catalogue)
 export(gen_cube)
+export(gen_download_job)
 export(gen_find)
 export(gen_list_jobs)
+export(gen_logincheck)
 export(gen_metadata)
 export(gen_metadata_cube)
-export(gen_metadata_stats)
-export(gen_metadata_tab)
-export(gen_metadata_val)
-export(gen_metadata_var)
+export(gen_metadata_statistic)
+export(gen_metadata_table)
+export(gen_metadata_value)
+export(gen_metadata_variable)
 export(gen_modified_data)
 export(gen_objects2stat)
 export(gen_objects2var)
 export(gen_search_vars)
+export(gen_signs)
 export(gen_table)
+export(gen_update_evas)
 export(gen_val2var)
 export(gen_val2var2stat)
 export(gen_var2stat)

diff --git a/R/data.R b/R/data.R
@@ -1,12 +1,11 @@
 #' List of EVAS codes
 #'
-#' @format ## `evas_list_long_20220724`
-#' A data frame with 1,097 rows and 3 columns:
+#' @format ## `evas_list`
+#' A data frame with 1132 rows and 3 columns:
 #' \describe{
 #'   \item{EVAS}{EVAS code}
 #'   \item{Beschreibung}{Details on the EVAS code}
 #'   \item{Titel}{Alternative desription of EVAS code contents}
-#'   ...
 #' }
 #' @source <https://www.destatis.de/DE/Methoden/Revisionen/Glossar/EVAS.html>
-"evas_list_long_20220724"
+"evas_list"
diff --git a/R/gen_alternative_terms.R b/R/gen_alternative_terms.R
@@ -1,90 +1,131 @@
-#' gen_alternative_terms: Call For Similiar or Spelling Related Terms for Further Search
+#' gen_alternative_terms
 #'
-#' @description Function to find search terms that are similar or related to one another and also represented in Genesis.
+#' @description Function to find search terms that are similar or related to one another in spelling and also represented in the GENESIS, Zensus 2022 or regionalstatistik.de databases. Important note: The API call is searching for terms with the same characters. To be useful in searching for related terms it is highly recommended to work with "*" placeholders (see examples). The placeholder can be placed before and/or after the search term.
 #'
 #' @param term Character string. Maximum length of 15 characters. Term or word for which you are searching for alternative or related terms. Use of '*' as a placeholder is possible to generate broader search areas.
-#' @param similarity Logical. Indicator if the output of the function should be sorted based on a Levenshtein edit distance based on the \code{adist()} function.
-#' @param ... Additional parameters for the Genesis API call. These parameters are only affecting the Genesis call itself, no further processing. For more details see `vignette("additional_parameter")`.
+#' @param similarity Boolean. Indicator if the output of the function should be sorted based on a Levenshtein edit distance based on the \code{adist()} function. Default is 'TRUE'.
+#' @param database Character string. Indicator if the GENESIS ('genesis'), Zensus 2022 ('zensus') or regionalstatistik.de ('regio') database is called. Default option is 'all'.
+#' @param verbose Boolean. Indicator if the output of the function should include detailed messages and warnings. Default option is 'TRUE'. Set the parameter to 'FALSE' to suppress additional messages and warnings.
+#' @param ... Additional parameters for the API call. These parameters are only affecting the call itself, no further processing. For more details see `vignette("additional_parameter")`.
 #'
-#' @return A list with all recalled elements from Genesis. Attributes are added to the data.frame, describing the search configuration for the returned output.
+#' @return A list with all recollected elements from the respective database. Attributes are added to the data.frame, describing the search configuration for the returned output.
 #' @export
 #'
 #' @examples
 #' \dontrun{
-#' # Find terms that are similar (in spelling) to search term "bus"
+#' # Find terms at GENESIS that are the same (in spelling) to search term "bus"
 #' # and sort them by Levenshtein edit distance
-#' object <- gen_alternative_terms(term = "bus", similarity = TRUE)
+#' object <- gen_alternative_terms(term = "bus", similarity = TRUE, database = "genesis")
 #'
-#' # Find terms that are related (in spelling) to search term "bus"
-#' object <- gen_alternative_terms(term = "bus*", similarity = TRUE)
+#' # Find terms at GENESIS that are related (in spelling) to search term "bus"
+#' object <- gen_alternative_terms(term = "bus*", similarity = TRUE, database = "genesis")
+#'
+#' # Find terms at Zensus 2022 that are related (in spelling) to search term "wohn"
+#' object <- gen_alternative_terms(term = "wohn*", similarity = TRUE, database = "zensus")
 #' }
 #'
 gen_alternative_terms <- function(term = NULL,
                                   similarity = TRUE,
+                                  database = c("all", "genesis", "zensus", "regio"),
+                                  verbose = TRUE,
                                   ...) {
 
   caller <- as.character(match.call()[1])
 
+  gen_fun <- test_database_function(database,
+                                    error.input = TRUE,
+                                    text = verbose)
+
   check_function_input(term = term,
                        similarity = similarity,
-                       caller = caller)
-
-#-------------------------------------------------------------------------------
+                       caller = caller,
+                       verbose = verbose)
 
-  results_raw <- gen_api("catalogue/terms",
+  #-----------------------------------------------------------------------------
 
-    username = gen_auth_get()$username,
-    password = gen_auth_get()$password,
-    selection = term,
-    ...)
+  res <- lapply(gen_fun, function(db){
 
-  results_json <- test_if_json(results_raw)
+    if (verbose) {
 
-  if (length(results_json$List) == 0) {
+      info <- paste("Started the processing of", rev_database_function(db), "database.")
 
-    stop("No related terms found for your code.", call. = FALSE)
+      message(info)
 
-  } else {
+    }
 
-    # similarity von Woertern berechnen und nach diesen Ordnen?
-    termslist <- c()
+    par_list <-  list(endpoint = "catalogue/terms",
+                      username = gen_auth_get(database = rev_database_function(db))$username,
+                      password = gen_auth_get(database = rev_database_function(db))$password,
+                      selection = term,
+                      ...)
 
-    termslist <- lapply(results_json$List, function(x) {
+    results_raw <- do.call(db, par_list)
 
-      append(termslist, x$Content)
+    #---------------------------------------------------------------------------
 
-    })
+    results_json <- test_if_json(results_raw)
 
-    termslist <- lapply(termslist, function(x) {
+    if (length(results_json$List) == 0  & length(gen_fun) == 1) {
 
-      gsub("\\s+", " ", x)
+      stop("No related terms found for your code.", call. = FALSE)
 
-    })
+    } else if (length(results_json$List) == 0  & length(gen_fun) > 1) {
 
-    termslist <- unlist(termslist)
+      termslist <- "No related terms found for your code."
 
-    if (isTRUE(similarity)) {
+      list_resp <- list("Output" = termslist)
 
-      # generalized levenstein edit distance
-      termslist <- termslist[order(utils::adist(term,
-                                                termslist,
-                                                ignore.case = TRUE))]
     } else {
 
-      # nchar order
-      termslist <- termslist[order(unlist(lapply(termslist, nchar)))]
+      # similarity von Woertern berechnen und nach diesen Ordnen?
 
-    }
+      termslist <- c()
+
+      termslist <- lapply(results_json$List, function(x) {
+
+        append(termslist, x$Content)
+
+      })
+
+      termslist <- lapply(termslist, function(x) {
+
+        gsub("\\s+", " ", x)
+
+      })
 
-    list_resp <- list("Output" = termslist)
+      termslist <- unlist(termslist)
+
+      #-------------------------------------------------------------------------
+
+      if (isTRUE(similarity)) {
+
+        # generalized Levenshtein edit distance
+        termslist <- termslist[order(utils::adist(term,
+                                                  termslist,
+                                                  ignore.case = TRUE))]
+      } else {
+
+        # nchar order
+        termslist <- termslist[order(unlist(lapply(termslist, nchar)))]
+
+      }
+
+      list_resp <- list("Output" = termslist)
+
+    }
 
     attr(list_resp, "Term") <- term
+    attr(list_resp, "Database") <- rev_database_function(db)
     attr(list_resp, "Language") <- results_json$Parameter$language
     attr(list_resp, "Pagelength") <- results_json$Parameter$pagelength
     attr(list_resp, "Copyright") <- results_json$Copyright
 
     return(list_resp)
 
-  }
+  })
+
+  res <- check_results(res)
+
+  return(res)
 
 }
diff --git a/R/gen_api.R b/R/gen_api.R
@@ -1,19 +1,84 @@
-#' Low-level function to interact with the Destatis GENESIS API
+#' gen_api
 #'
-#' @param endpoint Self-explanatory
+#' @description Low-level function to interact with the GENESIS API
+#'
+#' @param endpoint Character string. The endpoint of the API that is to be queried.
 #'
 #' @importFrom httr2 `%>%`
 #'
 #' @noRd
 #'
 #' @examples
+#' \dontrun{
 #' gen_api("helloworld/logincheck") %>%
 #'   httr2::resp_body_json()
+#' }
+#'
 gen_api <- function(endpoint, ...) {
+
   httr2::request("https://www-genesis.destatis.de/genesisWS/rest/2020") %>%
     httr2::req_user_agent("https://github.com/CorrelAid/restatis") %>%
     httr2::req_url_path_append(endpoint) %>%
-    httr2::req_url_query(!!!gen_auth_get(), ...) %>%
+    httr2::req_url_query(!!!gen_auth_get(database = "genesis"), ...) %>%
+    httr2::req_retry(max_tries = 3) %>%
+    httr2::req_perform()
+
+}
+
+#-------------------------------------------------------------------------------
+
+#' gen_regio_api
+#'
+#' @description Low-level function to interact with the regionalstatistik.de API
+#'
+#' @param endpoint Character string. The endpoint of the API that is to be queried.
+#'
+#' @importFrom httr2 `%>%`
+#'
+#' @noRd
+#'
+#' @examples
+#' \dontrun{
+#' gen_regio_api("helloworld/logincheck") %>%
+#'   httr2::resp_body_json()
+#' }
+#'
+gen_regio_api <- function(endpoint, ...) {
+
+  httr2::request("https://www.regionalstatistik.de/genesisws/rest/2020/") %>%
+    httr2::req_user_agent("https://github.com/CorrelAid/restatis") %>%
+    httr2::req_url_path_append(endpoint) %>%
+    httr2::req_url_query(!!!gen_auth_get(database = "regio"), ...) %>%
+    httr2::req_retry(max_tries = 3) %>%
+    httr2::req_perform()
+
+}
+
+#-------------------------------------------------------------------------------
+
+#' gen_zensus_api
+#'
+#' @description Low-level function to interact with the Zensus 2022 database
+#'
+#' @param endpoint Character string. The endpoint of the API that is to be queried.
+#'
+#' @importFrom httr2 `%>%`
+#'
+#' @noRd
+#'
+#' @examples
+#' \dontrun{
+#' gen_zensus_api("helloworld/logincheck") %>%
+#'   httr2::resp_body_json()
+#' }
+#'
+gen_zensus_api <- function(endpoint, ...) {
+
+  httr2::request("https://ergebnisse.zensus2022.de/api/rest/2020") %>%
+    httr2::req_user_agent("https://github.com/CorrelAid/restatis") %>%
+    httr2::req_url_path_append(endpoint) %>%
+    httr2::req_url_query(!!!gen_auth_get(database = "zensus"), ...) %>%
     httr2::req_retry(max_tries = 3) %>%
     httr2::req_perform()
+
 }