Skip to content

Commit

Permalink
Improve the way 'municipality' data is handled
Browse files Browse the repository at this point in the history
What a 'municipality' is, varies from country to
country. Strictly speaking, a Finnish
'municipality', in the Darwin Core standard, is
a 'county'. But in the USA for example a
'municipality' is a level of organisation below
the county level.

These changes account for these
differences by adding a level below 'municipality'
in the Finnish sense which is called 'local_area'
(or 'municipality' in DwC). It is assumed that
non-Finnish data will use the terms 'county' and
and 'municipality' in the raw data (e.g., data
coming from the Kotka CMS) in the sense they are
outline in Darwin Core. And Finnish data will map
'municipality' to DwC 'county' lacking any true
'municipality' data.
  • Loading branch information
wkmor1 committed Nov 4, 2024
1 parent 56b4132 commit c1d5a3c
Show file tree
Hide file tree
Showing 8 changed files with 482 additions and 346 deletions.
95 changes: 94 additions & 1 deletion R/finbif_occurrence.R
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,10 @@ occurrence <- function(fb_records_obj) {

fb_occurrence_df <- compute_region(fb_occurrence_df)

fb_occurrence_df <- compute_municipality(fb_occurrence_df)

fb_occurrence_df <- compute_local_area(fb_occurrence_df)

fb_occurrence_df <- compute_codes(fb_occurrence_df)

fb_occurrence_df <- extract_facts(fb_occurrence_df)
Expand Down Expand Up @@ -1436,6 +1440,91 @@ compute_region <- function(fb_occurrence_df) {

#' @noRd

compute_municipality <- function(fb_occurrence_df) {

dwc <- attr(fb_occurrence_df, "dwc", TRUE)

vtype <- col_type_string(dwc)

var_names <- sysdata(list(which = "var_names"))

m_var <- var_names[["computed_var_municipality", vtype]]

add <- attr(fb_occurrence_df, "include_new_cols", TRUE)

if (add && m_var %in% attr(fb_occurrence_df, "column_names", TRUE)) {

idv <- var_names[["gathering.interpretations.finnishMunicipality", vtype]]

id <- basename(fb_occurrence_df[[idv]])

municipality <- finbif_metadata(
"municipality",
attr(fb_occurrence_df, "locale", TRUE),
attr(fb_occurrence_df, "cache", TRUE)[[2L]]
)

fact_names <- var_names[["gathering.facts.fact", vtype]]

fact_values <- var_names[["gathering.facts.value", vtype]]

which_county <- lapply(
fb_occurrence_df[[fact_names]],
match,
"http://tun.fi/MY.county",
nomatch = 0
)

county <- mapply(
extract_fact,
fb_occurrence_df[[fact_values]],
lapply(which_county, as.logical),
SIMPLIFY = FALSE,
USE.NAMES = FALSE
)

fb_occurrence_df[[m_var]] <- ifelse(
is.na(id), unlist(county), municipality[id, "name"]
)

}

fb_occurrence_df

}

#' @noRd

compute_local_area <- function(fb_occurrence_df) {

dwc <- attr(fb_occurrence_df, "dwc", TRUE)

vtype <- col_type_string(dwc)

var_names <- sysdata(list(which = "var_names"))

la_var <- var_names[["computed_var_local_area", vtype]]

add <- attr(fb_occurrence_df, "include_new_cols", TRUE)

if (add && la_var %in% attr(fb_occurrence_df, "column_names", TRUE)) {

idv <- var_names[["gathering.interpretations.finnishMunicipality", vtype]]

lav <- var_names[["gathering.municipality", vtype]]

fb_occurrence_df[[la_var]] <- ifelse(
is.na(fb_occurrence_df[[idv]]), fb_occurrence_df[[lav]], NA_character_
)

}

fb_occurrence_df

}

#' @noRd

multi_req <- function(fb_records_obj) {

filters <- fb_records_obj[["filter"]]
Expand Down Expand Up @@ -1662,10 +1751,14 @@ extract_facts <- function(fb_occurrence_df) {

values_name <- var_names[[level_vls, vtype]]

fb_occurrence_df[[fact]] <- mapply(
fact_value <- mapply(
extract_fact, fb_occurrence_df[[values_name]], is_fact
)

fb_occurrence_df[[fact]] <- mapply(
concat_two_strings, fb_occurrence_df[[fact]], fact_value
)

}

}
Expand Down
69 changes: 44 additions & 25 deletions R/finbif_records.R
Original file line number Diff line number Diff line change
Expand Up @@ -607,28 +607,28 @@ infer_selection <- function(fb_records_obj) {

infer_computed_vars <- function(fb_records_obj) {

l <- list(
computable_vars <- list(
abundance = list(
vars = c(
"abundance", "individualCount", "occurrence_status", "occurrenceStatus"
"computed_var_abundance", "computed_var_occurrence_status"
),
v_names = c(
select_names = c(
"unit.interpretations.individualCount", "unit.abundanceString"
)
),
cu = list(
vars = c("coordinates_uncertainty", "coordinateUncertaintyInMeters"),
v_names = c(
vars = "computed_var_coordinates_uncertainty",
select_names = c(
"gathering.interpretations.coordinateAccuracy", "document.sourceId"
)
),
citation = list(
vars = c("citation", "bibliographicCitation"),
v_names = c("document.documentId", "document.sourceId")
vars = "computed_var_citation",
select_names = c("document.documentId", "document.sourceId")
),
sn = list(
vars = c("scientific_name", "scientificName"),
v_names = c(
vars = "computed_var_scientific_name",
select_names = c(
"unit.linkings.taxon.scientificName",
"unit.taxonVerbatim",
"unit.linkings.taxon.scientificNameAuthorship",
Expand All @@ -637,34 +637,49 @@ infer_computed_vars <- function(fb_records_obj) {
)
),
red_list = list(
vars = c("red_list_status", "redListStatus"),
v_names = c(
vars = "computed_var_red_list_status",
select_names = c(
"unit.linkings.taxon.latestRedListStatusFinland.status",
"unit.linkings.taxon.latestRedListStatusFinland.year"
)
),
region = list(
vars = c("region", "stateProvince"),
v_names = c(
vars = "computed_var_region",
select_names = c(
"gathering.interpretations.finnishMunicipality",
"gathering.province"
)
),
institution_code = list(
vars = c("institution_code", "institutionCode"),
v_names = "document.collectionId"
vars = "computed_var_institution_code",
select_names = "document.collectionId"
),
collection_code = list(
vars = c("collection_code", "collectionCode"),
v_names = "document.collectionId"
vars = "computed_var_institution_code",
select_names = "document.collectionId"
),
country = list(
vars = "country",
v_names = c("gathering.interpretations.country", "gathering.country")
vars = "computed_var_country",
select_names = c("gathering.interpretations.country", "gathering.country")
),
country_code = list(
vars = c("country_code", "countryCode"),
v_names = c("gathering.interpretations.country", "gathering.country")
vars = "computed_var_country_code",
select_names = c("gathering.interpretations.country", "gathering.country")
),
municipality = list(
vars = "computed_var_municipality",
select_names = c(
"gathering.interpretations.finnishMunicipality",
"gathering.facts.fact",
"gathering.facts.value"
)
),
local_area = list(
"computed_var_local_area",
select_names = c(
"gathering.interpretations.finnishMunicipality",
"gathering.municipality"
)
)
)

Expand All @@ -674,13 +689,17 @@ infer_computed_vars <- function(fb_records_obj) {

var_type <- fb_records_obj[["var_type"]]

for (i in l) {
for (i in computable_vars) {

i_vars <- i[["vars"]]

i_var_names <- var_names[i_vars, var_type]

if (any(i[["vars"]] %in% select)) {
if (any(i_var_names %in% select)) {

v_names_i <- i[["v_names"]]
select_names_i <- i[["select_names"]]

select <- c(select, var_names[v_names_i, var_type])
select <- c(select, var_names[select_names_i, var_type])

}

Expand Down
Binary file modified R/sysdata.rda
Binary file not shown.
8 changes: 8 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,14 @@ concat_string <- function(x) {

#' @noRd

concat_two_strings <- function(x, y) {

concat_string(c(x, y))

}

#' @noRd

cast_to_type <- function(
x,
type
Expand Down
3 changes: 2 additions & 1 deletion data-raw/variables.csv
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ computed_var_from_id_taxon_rank,taxon_rank,taxonRank,FALSE,FALSE,character,chara
unit.abundanceString,abundance_verbatim,verbatimAbundance,FALSE,FALSE,character,character,TRUE,TRUE,TRUE,TRUE,FALSE,FALSE,TRUE,FALSE,FALSE
gathering.interpretations.countryDisplayname,country_interpreted,countryInterpreted,FALSE,FALSE,character,character,TRUE,TRUE,TRUE,TRUE,TRUE,FALSE,FALSE,FALSE,FALSE
gathering.interpretations.biogeographicalProvinceDisplayname,bio_province_interpreted,bioStateProvinceInterpreted,FALSE,FALSE,character,character,TRUE,TRUE,TRUE,TRUE,TRUE,FALSE,FALSE,FALSE,FALSE
computed_var_from_id_municipality,municipality,county,FALSE,FALSE,character,character,TRUE,TRUE,FALSE,TRUE,TRUE,FALSE,TRUE,FALSE,FALSE
computed_var_municipality,municipality,county,FALSE,FALSE,character,character,TRUE,TRUE,FALSE,TRUE,TRUE,FALSE,TRUE,FALSE,FALSE
computed_var_local_area,local_area,municipality,FALSE,FALSE,character,character,TRUE,TRUE,FALSE,TRUE,TRUE,FALSE,FALSE,FALSE,FALSE
computed_var_region,region,stateProvince,FALSE,FALSE,character,character,TRUE,TRUE,FALSE,TRUE,TRUE,FALSE,TRUE,FALSE,FALSE
gathering.conversions.wgs84WKT,footprint_wgs84,footprintWKT,FALSE,FALSE,character,character,TRUE,TRUE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE
gathering.conversions.linelengthInMeters,line_length_m,lineLengthMeters,FALSE,FALSE,integer,integer,TRUE,TRUE,FALSE,TRUE,TRUE,FALSE,TRUE,FALSE,FALSE
Expand Down
Loading

0 comments on commit c1d5a3c

Please sign in to comment.