Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Begin implementing integration with geodk #47

Merged
merged 28 commits into from
Feb 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
9ec8daa
Add some attempt at tidying data
aleksanderbl29 Feb 13, 2025
874ca61
Remove unused code
aleksanderbl29 Feb 13, 2025
afb1f2f
Begin integration of geodk and dkstat packages
aleksanderbl29 Feb 13, 2025
dba340d
Update renv snapshot
aleksanderbl29 Feb 13, 2025
169e7c2
Add area things as vectors instead of datasets
aleksanderbl29 Feb 13, 2025
cd6823d
Specify some things
aleksanderbl29 Feb 13, 2025
2ffe94f
Add updated spec for geodk integration
aleksanderbl29 Feb 25, 2025
749dd5b
Implement all constructors for the 14 map levels provided by the api
aleksanderbl29 Feb 25, 2025
73da9f6
Import na.omit
aleksanderbl29 Feb 25, 2025
784a751
Remove unused vector that was commented out anyway
aleksanderbl29 Feb 25, 2025
6519cc1
Add cyclocomp
aleksanderbl29 Feb 25, 2025
9327c5a
Add geodk instal
aleksanderbl29 Feb 25, 2025
8d9e7a6
Move custom r commands to a step after the r installation itself
aleksanderbl29 Feb 25, 2025
3f6adb1
Specify r universe repo in install command
aleksanderbl29 Feb 25, 2025
13b6a5a
Install from github instead
aleksanderbl29 Feb 25, 2025
7cdf2cd
Use the right function
aleksanderbl29 Feb 25, 2025
52f854f
Add test for each constructor class
aleksanderbl29 Feb 25, 2025
55fd6e0
Rename function
aleksanderbl29 Feb 25, 2025
a871bda
Remove lint and loosen linting settings a bit
aleksanderbl29 Feb 25, 2025
0974c73
Install with remotes package
aleksanderbl29 Feb 25, 2025
6a58e4c
Install package from runiverse
aleksanderbl29 Feb 25, 2025
63b933f
Add tidyverse
aleksanderbl29 Feb 25, 2025
7f9cb0e
Try to fix cmd check
aleksanderbl29 Feb 25, 2025
600067c
Dont eval
aleksanderbl29 Feb 25, 2025
abf1f54
More things
aleksanderbl29 Feb 25, 2025
3ac8762
Declare import
aleksanderbl29 Feb 25, 2025
b6391e8
Increment versions and date
aleksanderbl29 Feb 25, 2025
0f38c5a
Remove unused utils file
aleksanderbl29 Feb 25, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@
^docs$
^README.Rmd$
^.lintr$
^doc$
^Meta$
13 changes: 12 additions & 1 deletion .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: main
branches: [ main ]
pull_request:
workflow_dispatch:
schedule:
Expand Down Expand Up @@ -37,22 +37,33 @@ jobs:
steps:
- uses: actions/checkout@v4


- uses: r-lib/actions/setup-pandoc@v2

- uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ matrix.config.r }}
http-user-agent: ${{ matrix.config.http-user-agent }}
use-public-rspm: true
extra-repositories: |
https://ropengov.r-universe.dev

- uses: r-lib/actions/setup-r-dependencies@v2
with:
cache-version: 3
extra-packages: |
any::rcmdcheck
any::geodk
any::tidyverse
needs: |
check

# - name: Install package from r-universe
# run: |
# install.packages("remotes")
# remotes::install_github("rOpenGov/geodk")
# shell: Rscript {0}

- uses: r-lib/actions/check-r-package@v2
with:
upload-snapshots: true
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
- uses: r-lib/actions/setup-r-dependencies@v2
with:
cache-version: 2
extra-packages: any::lintr, local::.
extra-packages: any::lintr, any::cyclocomp, local::.
needs: lint

- name: Lint
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@ read-and-delete-me
test_script.R
.DS_Store
docs
inst/doc
/doc/
/Meta/
8 changes: 6 additions & 2 deletions .lintr
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
linters: linters_with_defaults(
cyclocomp_linter(complexity_limit = 17L))
cyclocomp_linter(complexity_limit = 17L),
object_usage_linter = NULL,
return_linter = NULL)
encoding: "UTF-8"
exclusions: list(
"tests")
"tests",
"vignettes",
"R/s3_constructors.R")
12 changes: 8 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Type: Package
Package: dkstat
Title: API connection to the StatBank from Statistics Denmark
Version: 0.0.0.9001
Date: 2024-12-10
Version: 0.0.0.9002
Date: 2025-02-25
Authors@R: c(
person("Aleksander", "Bang-Larsen", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0009-0007-7984-4650")),
Expand All @@ -22,7 +22,10 @@ Imports:
lubridate,
stringr
Suggests:
testthat
knitr,
rmarkdown,
testthat,
dplyr
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
Config/testthat/edition: 3
Expand All @@ -31,5 +34,6 @@ RoxygenNote: 7.3.2
X-schema.org-isPartOf: http://ropengov.org/
X-schema.org-keywords: ropengov
Depends:
R (>= 2.10)
R (>= 4.1)
LazyData: true
VignetteBuilder: knitr
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@ export(dst_get_data)
export(dst_get_tables)
export(dst_meta)
export(dst_search)
importFrom(stats,na.omit)
importFrom(utils,read.csv)
importFrom(utils,read.csv2)
36 changes: 36 additions & 0 deletions R/determine_geo_properties.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# determine_geo_properties.R
# This file contains the logic to decide what kind of geographic information is
# included in a given dataset. The function is mainly used in
# `dst_get_data()` - Thus affecting `dst_get_all_data()`

determine_geo_properties <- function(table, df) {
clnms <- colnames(df)

meta <- dst_meta(table, lang = "da", geo = TRUE)

if (is_geographic(meta)) {
chosen_constructor <- choose_geo_class(meta)
chosen_constructor(df)
} else {
df
}
}

# Check if the metadata indicates a geographic variable
is_geographic <- function(meta) {
if (is.null(meta)) {
FALSE
} else if (!is.null(meta)) {
TRUE
}
}

# Choose a class constructor
choose_geo_class <- function(meta) {
# Get the map variable
meta_class <- meta$variables$map

# Paste class constructor name prefix with class name and get function
paste0("new_dkstat_", meta_class) |>
get()
}
3 changes: 2 additions & 1 deletion R/dst_get_all_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
#' not need to specify anything other than a table name and you will be given
#' the entire contents of the table in a nice long format. This is useful for
#' you, if you would like to filter the table with e.g. `{dplyr}` functions or
#' save the entire table for archival.
#' save the entire table for archival. If the table is larger than the max
#' 1.000.000 cells, then you will have to use `dst_get_data()`.
#'
#' @export
#' @inheritParams dst_get_data
Expand Down
4 changes: 3 additions & 1 deletion R/dst_get_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -133,5 +133,7 @@ dst_get_data <- function(table,
dst_data$TID <- dst_date_parse(dst_date = dst_data$TID)
}

return(dst_data)
data <- determine_geo_properties(table, dst_data)

return(data)
}
10 changes: 8 additions & 2 deletions R/dst_meta.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
#' @param table The name of the table you want meta data for.
#' @param ... Ignored.
#' @param lang You can choose "en" for english or "da" for danish.
#' @param geo You can choose if the function should return the geographic
#' properties for the metadata. Mostly for internal use.
#' @export
dst_meta <- function(table, ..., lang = "da") {
dst_meta <- function(table, ..., lang = "da", geo = FALSE) {
## Create and parse url
dkstat_url <- paste0("http://api.statbank.dk/v1/tableinfo/", table, "?")
dkstat_url <- httr::parse_url(url = dkstat_url)
Expand All @@ -27,7 +29,11 @@ dst_meta <- function(table, ..., lang = "da") {
)

## Structure results
meta <- dst_meta_parse(meta, lang)
if (isFALSE(geo)) {
meta <- dst_meta_parse(meta, lang)
} else if (isTRUE(geo)) {
meta <- dst_meta_map(meta, lang)
}

return(meta)
}
32 changes: 32 additions & 0 deletions R/dst_meta_map.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#' This function parses the JSON from the StatBank.
#' (http://www.statistikbanken.dk/statbank5a/ or http://www.dst.dk) and
#' retrieves the mapping information for all appropriate variables
#'
#' @description The function has been adapted from `dst_meta` to only retrieve
#' the geographic labels.
#'
#'
#' @inheritParams dst_meta_parse
#' @importFrom stats na.omit
#' @noRd
dst_meta_map <- function(meta, lang) {
# Get basic info on the table.
# The basics contains the table name/id and the short text of what is in the
# table
basics_names <- c("id", "text")
basics <- meta[names(meta) %in% basics_names]

# Get the description of the variables.
# If map does not exist, we return a NULL
if (!"map" %in% colnames(meta[["variables"]])) {
return(NULL)
}
variables <- meta[["variables"]][, c("id", "text", "map")] |>
na.omit()

# Return the data as a list.
return(list(
"basics" = basics,
"variables" = variables
))
}
102 changes: 102 additions & 0 deletions R/s3_constructors.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# s3_constructors.R
# This file defines all the s3 constructors needed for the appropriate dispatch
# of geographic matching in `{geodk}` (or other packages that might benefit from
# knowing the geographic type). The class is assigned "after the fact", as
# Wickham calls it, ensuring that the usual behaviour of a data.frame is
# preserved for all the functions that don't know about these special classes.

new_dkstat_KOMGRP <- function(x) {
stopifnot(is.data.frame(x))
class(x) <- c("dkstat_KOMGRP", class(x))
return(x)
}

new_dkstat_kom_omraade <- function(x) {
stopifnot(is.data.frame(x))
class(x) <- c("dkstat_kom_omraade", class(x))
return(x)
}

new_dkstat_Denmark_municipality_07 <- function(x) {
stopifnot(is.data.frame(x))
class(x) <- c("dkstat_Denmark_municipality_07", class(x))
return(x)
}

new_dkstat_Verden_dk2 <- function(x) {
stopifnot(is.data.frame(x))
class(x) <- c("dkstat_Verden_dk2", class(x))
return(x)
}

new_dkstat_denmark_cities_19 <- function(x) {
stopifnot(is.data.frame(x))
class(x) <- c("dkstat_denmark_cities_19", class(x))
return(x)
}

new_dkstat_denmark_parish_23_4c <- function(x) {
stopifnot(is.data.frame(x))
class(x) <- c("dkstat_denmark_parish_23_4c", class(x))
return(x)
}

new_dkstat_denmark_municipalitygroups_24 <- function(x) {
stopifnot(is.data.frame(x))
class(x) <- c("dkstat_denmark_municipalitygroups_24", class(x))
return(x)
}

new_dkstat_Denmark_region_07 <- function(x) {
stopifnot(is.data.frame(x))
class(x) <- c("dkstat_Denmark_region_07", class(x))
return(x)
}

new_dkstat_Denmark_rural_07 <- function(x) {
stopifnot(is.data.frame(x))
class(x) <- c("dkstat_Denmark_rural_07", class(x))
return(x)
}

new_dkstat_denmark_multimember_constituency_23 <- function(x) {
stopifnot(is.data.frame(x))
class(x) <- c("dkstat_denmark_multimember_constituency_23", class(x))
return(x)
}

new_dkstat_denmark_deanary_23 <- function(x) {
stopifnot(is.data.frame(x))
class(x) <- c("dkstat_denmark_deanary_23", class(x))
return(x)
}

new_dkstat_europe_dk <- function(x) {
stopifnot(is.data.frame(x))
class(x) <- c("dkstat_europe_dk", class(x))
return(x)
}

new_dkstat_Verden_dk <- function(x) {
stopifnot(is.data.frame(x))
class(x) <- c("dkstat_Verden_dk", class(x))
return(x)
}

new_dkstat_Europa_DK3 <- function(x) {
stopifnot(is.data.frame(x))
class(x) <- c("dkstat_Europa_DK3", class(x))
return(x)
}

new_dkstat_Denmark_county <- function(x) {
stopifnot(is.data.frame(x))
class(x) <- c("dkstat_Denmark_county", class(x))
return(x)
}

new_dkstat_Verden_dk4 <- function(x) {
stopifnot(is.data.frame(x))
class(x) <- c("dkstat_Verden_dk4", class(x))
return(x)
}
Loading
Loading