diff --git a/DESCRIPTION b/DESCRIPTION index 84be6ed..82623bf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,23 +20,22 @@ Suggests: roxygen2, rmarkdown, dplyr, - curl, - glue, tibble, - sf + s2 Remotes: - geomarker-io/cincy, - cole-brokamp/dpkg + geomarker-io/cincy Config/testthat/edition: 3 URL: https://github.com/geomarker-io/codec, http://geomarker.io/codec/ BugReports: https://github.com/geomarker-io/codec/issues Imports: rlang, + tidyselect, stringr, - purrr (>= 1.0.0), - cincy (>= 1.1.0), - dpkg (>= 0.5.1) + glue, + dpkg, + sf, + cincy Depends: R (>= 2.10) LazyData: true diff --git a/NAMESPACE b/NAMESPACE index cc1aa49..b3b3478 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,8 +1,15 @@ # Generated by roxygen2: do not edit by hand export(as_codec_dpkg) +export(cincy_census_geo) +export(cincy_city_geo) +export(cincy_county_geo) +export(cincy_neighborhood_geo) +export(cincy_zcta_geo) +export(codec_as_sf) export(codec_colors) -export(codec_dpkg_as_sf) -export(codec_dpkg_s3_put) +export(codec_interpolate) export(get_codec_dpkg) +export(install_cagis_data) +export(is_codec_dpkg) import(cincy) diff --git a/R/cincy_geographies.R b/R/cincy_geographies.R new file mode 100644 index 0000000..8ac9623 --- /dev/null +++ b/R/cincy_geographies.R @@ -0,0 +1,180 @@ +#' Cincy census tracts and block groups +#' +#' Read tract and block group ("bg") geographies from the online Census +#' [TIGER/Line](https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html) +#' files into R +#' @param geography which type of cincy census geography to return +#' @param vintage a character vector of a year corresponding to the vintage of TIGER/Line data +#' @details +#' Compressed shapefiles are downloaded from TIGER into an R user data directory and will be cached +#' for use across other R sessions (see `?dpkg::stow` for more details). +#' @returns a simple features object with a geographic identifier column (`geoid`) +#' and a geometry column (`s2_geography`) +#' @export +#' @examples +#' cincy_census_geo("tract", "2024") +#' cincy_census_geo("tract", "2020") +#' cincy_census_geo("tract", "2019") +#' cincy_census_geo("bg", "2020") +#' cincy_census_geo("bg", "2019") +cincy_census_geo <- function(geography = c("tract", "bg"), vintage = as.character(2024:2013)) { + geography <- rlang::arg_match(geography) + vintage <- rlang::arg_match(vintage) + tiger_url <- glue::glue( + "https://www2.census.gov/geo/tiger/TIGER{vintage}", + "/{toupper(geography)}/tl_{vintage}_39_{geography}.zip" + ) + tiger_local <- dpkg::stow_url(tiger_url) + out <- + sf::read_sf(glue::glue("/vsizip/", tiger_local), + query = glue::glue("SELECT GEOID FROM tl_{vintage}_39_{geography} WHERE COUNTYFP = '061'") + ) + names(out) <- tolower(names(out)) + out$s2_geography <- sf::st_as_s2(out$geometry) + out <- sf::st_drop_geometry(out) + out <- sf::st_as_sf(out) + return(out) +} + +#' Cincy county +#' @rdname cincy_census_geo +#' @export +#' @examples +#' cincy_county_geo("2024") +cincy_county_geo <- function(vintage = as.character(2024:2013)) { + vintage <- rlang::arg_match(vintage) + tiger_url <- glue::glue("https://www2.census.gov/geo/tiger/TIGER{vintage}/COUNTY/tl_{vintage}_us_county.zip") + tiger_local <- dpkg::stow_url(tiger_url) + out <- + sf::read_sf(glue::glue("/vsizip/", tiger_local), + query = glue::glue("SELECT GEOID FROM tl_{vintage}_us_county WHERE GEOID = '39061'") + ) + return(sf::st_as_s2(out$geometry)) +} + +#' Install CAGIS GIS database +#' +#' This installs the CAGIS Open Data GIS database (`.gdb`) into the data +#' directory for the codec package. Once downloaded, it will be reused +#' across R sessions on the same computer. +#' The geodatabase contains many +#' [layers](https://www.cagis.org/Opendata/Quarterly_GIS_Data/OpenData_Layer_List.txt) that are +#' updated quarterly. (Historical geodatabases are not available here.) +#' @seealso This function is called by `cincy_neighborhood_geo()`, `cincy_city_geo()` +#' and others that import individual layers. +#' @param cagis_data_url the url to the CAGIS Open Data .gdb.zip file; this changes quarterly, so +#' [check](https://www.cagis.org/Opendata/Quarterly_GIS_Data) for something more recent if the file cannot be found +#' @export +#' @examples +#' options(timeout = max(2500, getOption("timeout")), download.file.method = "libcurl") +#' install_cagis_data() +#' sf::st_layers(install_cagis_data())$name +install_cagis_data <- function(cagis_data_url = "https://www.cagis.org/Opendata/Quarterly_GIS_Data/CAGISOpenDataQ4_2024.gdb.zip") { + cagis_gdb_name <- tools::file_path_sans_ext(basename(cagis_data_url)) + dest <- file.path(tools::R_user_dir(package = "codec", "data"), cagis_gdb_name) + if (file.exists(dest)) { + return(dest) + } + tmp <- tempfile(fileext = ".zip") + utils::download.file(cagis_data_url, destfile = tmp, mode = "wb") + utils::unzip(tmp, exdir = dirname(dest)) + return(dest) +} + +#' Cincy neighborhood geographies +#' +#' CAGIS data (see `install_cagis_data()`) provides community council boundaries, but these boundaries can +#' overlap and do not align with census geographies or ZIP codes. +#' By default, the statistical neighborhood approximations are instead returned, +#' which are calculated by aggregating census tracts into 50 matching neighborhoods. +#' @param geography which type of cincy neighborhood geography to return +#' @returns a simple features object with a geographic identifier column (`geoid`) +#' and a geometry column (`s2_geography`) +#' @export +#' @examples +#' cincy_neighborhood_geo("statistical_neighborhood_approximations") +#' cincy_neighborhood_geo("community_council") +cincy_neighborhood_geo <- function(geography = c("statistical_neighborhood_approximations", "community_council")) { + geography <- rlang::arg_match(geography) + if (geography == "statistical_neighborhood_approximations") { + noi <- c("Cincinnati_Statistical_Neighborhood_Approximations" = "SNA_NAME") + } + if (geography == "community_council") { + noi <- c("Cincinnati_Community_Council_Neighborhoods" = "NEIGH") + } + d <- sf::st_read(install_cagis_data(), names(noi), quiet = TRUE) + out <- tibble::tibble( + geoid = sf::st_drop_geometry(d)[, noi], + s2_geography = sf::st_as_s2(sf::st_cast(sf::st_zm(d$SHAPE), "MULTIPOLYGON")) + ) |> + sf::st_as_sf() + return(out) +} + +#' cincy_city_geo() +#' @export +#' @rdname cincy_neighorhood_geo +#' @examples +#' cincy_city_geo() +cincy_city_geo <- function() { + cagis_db <- install_cagis_data() + out <- sf::st_read(cagis_db, layer = "Cincinnati_City_Boundary", quiet = TRUE) + return(sf::st_as_s2(out$SHAPE)) +} + +#' Cincy ZIP Code Tabulation Areas +#' +#' Read [ZIP Code Tabulation Areas +#' (ZCTAs)](https://www.census.gov/programs-surveys/geography/guidance/geo-areas/zctas.html) +#' geographies from the online Census +#' [TIGER/Line](https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html) +#' files into R +#' @param vintage a character vector of a year corresponding to the vintage of TIGER/Line data +#' @export +#' @returns a simple features object with a geographic identifier column (`geoid`) +#' and a geometry column (`s2_geography`) +#' @examples +#' cincy_zcta_geo() +#' cincy_zcta_geo("2018") +cincy_zcta_geo <- function(vintage = as.character(2024:2013)) { + vintage <- rlang::arg_match(vintage) + is_vintage_old <- vintage %in% as.character(2013:2019) + tiger_url <- glue::glue( + "https://www2.census.gov/geo/tiger/TIGER{vintage}/", + ifelse(is_vintage_old, "ZCTA5", "ZCTA520"), + "/tl_{vintage}_us_zcta", + ifelse(is_vintage_old, "510", "520"), + ".zip" + ) + tiger_local <- dpkg::stow_url(tiger_url) + out <- + sf::read_sf(glue::glue("/vsizip/", tiger_local), + query = glue::glue( + "SELECT ", + ifelse(is_vintage_old, "GEOID10", "GEOID20"), + " FROM tl_{vintage}_us_zcta", + ifelse(is_vintage_old, "510", "520"), + " WHERE ", + ifelse(is_vintage_old, "GEOID10", "GEOID20"), + " IN ({paste(paste0(\"'\", cincy_zip_codes, \"'\"), collapse = \", \")})" + ) + ) + names(out) <- gsub("[0-9]", "", tolower(names(out))) + out$s2_geography <- sf::st_as_s2(out$geometry) + out <- sf::st_drop_geometry(out) + out <- sf::st_as_sf(out) + return(out) +} + +# from cincy::zcta_tiger_2020 (version 1.1.0) on 2024-11-08 +cincy_zip_codes <- + c( + "45214", "45208", "45236", "45247", "45225", "45205", "45220", + "45206", "45223", "45232", "45174", "45207", "45209", "45212", + "45213", "45217", "45218", "45229", "45238", "45242", "45051", + "45002", "45227", "45211", "45215", "45216", "45219", "45224", + "45033", "45237", "45239", "45248", "45041", "45267", "45030", + "45252", "45244", "45202", "45249", "45255", "45226", "45203", + "45246", "45111", "45147", "45052", "45240", "45241", "45243", + "45251", "45001", "45204", "45231", "45230", "45233" + ) diff --git a/R/cincy_interpolate.R b/R/cincy_interpolate.R new file mode 100644 index 0000000..7fda6c0 --- /dev/null +++ b/R/cincy_interpolate.R @@ -0,0 +1,128 @@ +#' Coerce CoDEC data package into a simple features object +#' +#' The name of the census tract column in the CoDEC data package is used to add +#' the appropriate cincy geography. +#' @param x a CoDEC data package +#' @details Tract identifers do not change across decennial censuses, but the digital representation of their boundaries +#' may be improved over time. Here, data packages using 2010 tract identifers use the TIGER/Line 2019 tract shapefiles +#' and data packages using 2020 tract identifiers use the TIGER/Line 2020 tract shapefiles +#' @returns a simple features object with a geographic identifier column (`geoid`) +#' and a geometry column (`s2_geography`) in addition to the columns in `x` +#' @export +#' @examples +#' codec_as_sf(get_codec_dpkg("property_code_enforcements-v0.2.0")) +codec_as_sf <- function(x) { + if (!is_codec_dpkg(x)) rlang::abort("x must be a CoDEC data package") + codec_tract_id_name <- + ifelse( + any(grepl("census_tract_id_2010", names(x), fixed = TRUE)), + "census_tract_id_2010", "census_tract_id_2020" + ) + tiger_vintage <- ifelse(codec_tract_id_name == "census_tract_id_2020", "2020", "2019") + gd <- + cincy_census_geo("tract", tiger_vintage) |> + dplyr::left_join(x, by = c("geoid" = codec_tract_id_name)) + return(gd) +} + +#' Spatially interpolate community-level data +#' +#' Census block-level weights are used to spatially interpolate CoDEC data packages from 2020 +#' census tract geographies to 2020 neighborhoods (`neigh`), ZIP code tabulation areas (`zcta`), or block groups (`bg`). +#' @param from a CoDEC data package +#' @param to character string naming a target geography +#' @param weights which census block-level weights to use; see details +#' @returns a tibble with a new geographic identifier column for the `to` target geography (`geoid`) +#' in addition to the (interpolated) columns in `from` +#' @details +#' Block-level total population (`pop`), total number of homes (`homes`), or total land area (`area`) +#' from the 2020 Census can be chosen to use for the weights. +#' Geospatial intersection happens after transforming geographies to epsg:5072. +#' See `codec_as_sf()` for adding geography to a CoDEC data package. +#' Variables beginning with "n_" are interpolated using a weighted sum; +#' all other variables are interpolated using a weighted mean. +#' @export +#' @examples +#' codec_interpolate(from = get_codec_dpkg("acs_measures-v0.1.0")) +#' # TODO codec_interpolate(from = get_codec_dpkg("property_code_enforcements-v0.2.0")) +codec_interpolate <- function(from, to = c("neigh", "zcta", "bg"), weights = c("pop", "homes", "area")) { + to <- rlang::arg_match(to) + weights <- rlang::arg_match(weights) + from_sf <- + from |> + codec_as_sf() |> + dplyr::slice_sample(n = 1, by = "geoid") |> + dplyr::select(geoid) |> + sf::st_transform(5072) + + if (to == "zcta") { + to_sf <- cincy_zcta_geo("2020") + } else if (to == "neigh") { + to_sf <- cincy_neighborhood_geo("statistical_neighborhood_approximations") + } else if (to == "bg") { + to_sf <- cincy_census_geo("bg", "2020") + } + to_sf <- sf::st_transform(to_sf, 5072) + + bw <- + cincy_block_weights() |> + sf::st_transform(5072) |> + dplyr::select(the_weight = {{ weights}}, s2_geography) + # TODO add back in choice for weights + + interpolation_weights <- + sf::st_intersection(dplyr::select(to_sf, geoid), dplyr::select(from_sf, geoid)) |> + dplyr::filter(sf::st_is(s2_geography, c("POLYGON", "MULTIPOLYGON", "GEOMETRYCOLLECTION"))) |> + sf::st_join(bw) |> + sf::st_drop_geometry() |> + dplyr::arrange(geoid) |> + stats::na.omit() |> + dplyr::filter(the_weight > 0) |> + dplyr::mutate(weight_coef = the_weight / sum(the_weight), .by = c("geoid")) |> + dplyr::summarize(weight = sum(weight_coef), .by = c("geoid", "geoid.1")) |> + suppressWarnings() + + out <- + from |> + dplyr::left_join(interpolation_weights, by = c("census_tract_id_2020" = "geoid.1")) |> + dplyr::group_by(geoid, year) |> + ## TODO + ## dplyr::left_join(interpolation_weights, by = c("census_tract_id_2010" = "geoid.1")) |> + ## dplyr::group_by(geoid, year, month) |> + dplyr::summarize( + dplyr::across( + c(-tidyselect::starts_with("n_"), -tidyselect::starts_with("census_tract_id_"), -weight), + \(x) stats::weighted.mean(x, weight, na.rm = TRUE) + ), + dplyr::across(tidyselect::starts_with("n_"), \(x) sum(x * weight, na.rm = TRUE)) + ) |> + dplyr::ungroup() + + return(out) +} + +cincy_block_weights <- function() { + tiger_url <- "https://www2.census.gov/geo/tiger/TIGER2020/TABBLOCK20/tl_2020_39_tabblock20.zip" + tiger_local <- dpkg::stow_url(tiger_url) + rd <- + sf::read_sf(glue::glue("/vsizip/", tiger_local), + query = glue::glue("SELECT GEOID20,ALAND20,HOUSING20,POP20 FROM tl_2020_39_tabblock20 WHERE COUNTYFP20 = '061'") + ) + out <- + rd |> + sf::st_transform(5072) |> + sf::st_point_on_surface() |> + suppressWarnings() |> + dplyr::select(pop = POP20, homes = HOUSING20, area = ALAND20) + out$s2_geography <- sf::st_as_s2(out$geometry) + out <- sf::st_drop_geometry(out) + out <- sf::st_as_sf(out) + return(out) +} + +utils::globalVariables(c( + "POP20", "HOUSING20", "ALAND20", + "geoid", "pop", "s2_geography", + "the_weight", "weight_coef", + "year", "month", "weight" +)) diff --git a/R/codec_dpkg.R b/R/codec_dpkg.R index 953c9df..071c10b 100644 --- a/R/codec_dpkg.R +++ b/R/codec_dpkg.R @@ -1,4 +1,4 @@ -#' Read a dpkg from the public CoDEC repository into R +#' Read a dpkg from CoDEC into R #' #' Public data packages are downloaded from `gh://geomarker-io/codec/` using #' `dpkg::stow()` to cache a local copy in the user's data directory. @@ -17,55 +17,18 @@ get_codec_dpkg <- function(codec_dpkg, overwrite = FALSE) { return(out) } - -#' Convert a CoDEC dpkg into an sf object -#' -#' The required census tract identifier column name is used to merge in tract geographies -#' from the [cincy](https://geomarker.io/cincy) package. -#' -#' @param x a CoDEC dpkg -#' @returns an `sf` object that is a codec dpkg with an added `geometry` column -#' @export -#' @examples -#' get_codec_dpkg("drivetime-v0.2.2") |> -#' codec_dpkg_as_sf() -codec_dpkg_as_sf <- function(x) { - x <- as_codec_dpkg(x, - name = attr(x, "name"), - version = attr(x, "version"), - title = attr(x, "title"), - description = attr(x, "description"), - homepage = attr(x, "homepage") - ) - census_tract_id_names <- paste0("census_tract_id", c("_2000", "_2010", "_2020")) - census_tract_id_name <- census_tract_id_names[census_tract_id_names %in% names(x)] - census_tract_id_year <- stringr::str_extract(census_tract_id_name, "[0-9]+") - census_tract_geo <- - parse(text = paste0("cincy::tract_tigris_", census_tract_id_year)) |> - eval() - rlang::check_installed("sf", "return sf objects") - out <- - dplyr::left_join(x, census_tract_geo, by = census_tract_id_name) |> - sf::st_as_sf() - return(out) -} - - -#' as_codec_dpkg -#' -#' Convert a tibble to a data package (`dpkg`) object in R while checking it -#' against CoDEC data specifications: -#' +#' Convert a tibble to a CoDEC data package +#' +#' **CoDEC Specifications:** #' 1. The data must include a [census tract](https://www2.census.gov/geo/pdfs/education/CensusTracts.pdf) -#' identifier column (i.e., `census_tract_id_2000`, `census_tract_id_2010`, or `census_tract_id_2020`). +#' identifier column (i.e., `census_tract_id_2010`, or `census_tract_id_2020`). #' The column must contain 11-digit #' [GEOID](https://www.census.gov/programs-surveys/geography/guidance/geo-identifiers.html) #' identifiers for every census tract in Hamilton County, OH. #' 2. The data includes a year column (`year`), an integer year representing the #' vintage of the data (e.g. `2021`). #' The data can optionally include a month column (`month`), an integer month of the year. -#' -#' Data must be structured in a tidy format such that each row is an observation +#' 3. Data must be structured in a tidy format such that each row is an observation #' for a specific census tract at a specific year (and month). #' #' @param x data.frame or tibble meeting CoDEC data specifications above @@ -74,7 +37,7 @@ codec_dpkg_as_sf <- function(x) { #' @param title see `dpkg::as_dpkg()` #' @param description see `dpkg::as_dpkg()` #' @param homepage see `dpkg::as_dpkg()` -#' @returns a dpkg object +#' @returns for as_codec_dpkg, a dpkg object #' @export as_codec_dpkg <- function(x, name, version, title = character(), description = character(), homepage = character()) { chk1 <- check_census_tract_id(x) @@ -85,38 +48,63 @@ as_codec_dpkg <- function(x, name, version, title = character(), description = c return(out) } +#' is_codec_dpkg +#' @rdname as_codec_dpkg +#' @returns for is_codec_dpkg, a logical +#' @export +#' @examples +#' is_codec_dpkg(mtcars) +is_codec_dpkg <- function(x) { + list_dpkg <- dpkg::dpkg_meta(x) + list_dpkg$x <- tibble::as_tibble(x) + out <- + tryCatch( + { + do.call(as_codec_dpkg, list_dpkg) + TRUE + }, + error = function(x) FALSE + ) + return(out) +} + check_census_tract_id <- function(x) { - census_tract_id_names <- paste0("census_tract_id", c("_2000", "_2010", "_2020")) + census_tract_id_names <- paste0("census_tract_id", c("_2010", "_2020")) # has census_tract_id_{year} or census_tract_id column if (sum(names(x) %in% census_tract_id_names) != 1) { - return("must contain one census tract id column called census_tract_id_2000, census_tract_id_2010, or census_tract_id_2020") + return("must contain one census tract id column called census_tract_id_2010 or census_tract_id_2020") } census_tract_id_name <- census_tract_id_names[census_tract_id_names %in% names(x)] census_tract_id_year <- stringr::str_extract(census_tract_id_name, "[0-9]+") - required_census_tract_ids <- - parse(text = paste0("cincy::tract_tigris_", census_tract_id_year)) |> - eval() |> - purrr::pluck(paste0("census_tract_id_", census_tract_id_year)) + if (census_tract_id_year == "2010") { + required_census_tract_ids <- cincy_census_geo("tract", "2019")$geoid + } else if (census_tract_id_year == "2020") { + required_census_tract_ids <- cincy_census_geo("tract", "2020")$geoid + } if (!all(required_census_tract_ids %in% x[[census_tract_id_name]])) { - return(glue::glue("the census tract id column, {census_tract_id_name}, does not contain every census tract in `cincy::tract_tigris_{census_tract_id_year}`")) + return(glue::glue("the census tract id column, {census_tract_id_name},", + "does not contain every census tract for that vintage;", + "Check for missing census tract observations and", + "check that you are using the correct vintage.", + .sep = " " + )) } return(invisible(NULL)) } check_date <- function(x) { - if (! "year" %in% names(x)) { + if (!"year" %in% names(x)) { return("must contain a 'year' column") } years <- unique(x$year) - if (! all(years %in% 1970:2099)) { + if (!all(years %in% 1970:2099)) { return("the 'year' field must only contain integer years between 1970 and 2099") } if ("month" %in% names(x)) { - if (! all(x$month %in% 1:12)) { + if (!all(x$month %in% 1:12)) { return("the 'month' field must only contain integer values 1-12") } } return(invisible(NULL)) } - diff --git a/R/codec_dpkg_s3.R b/R/codec_dpkg_s3.R deleted file mode 100644 index e7639fe..0000000 --- a/R/codec_dpkg_s3.R +++ /dev/null @@ -1,32 +0,0 @@ -#' Put a dpkg into the public CoDEC S3 bucket -#' -#' The [AWS CLI](https://aws.amazon.com/cli/) tool must be installed and authenticated to -#' write to `s3://geomarker-io/codec_data`. -#' The resulting data package will be available publicly. -#' @param x a data package (`dpkg`) object, ideally created with `as_codec_dpkg()` to ensure -#' the data meets CoDEC specifications -#' @returns character string URI of uploaded resource -#' @export -#' @examples -#' \dontrun{ -#' # use aws command line to login interactively via profile sso account" -#' system2("aws", c("sso", "login", "--profile", "geomarker-io")) -#' # make sure to set AWS_PROFILE so the AWS CLI tool knows to use it by default" -#' Sys.setenv("AWS_PROFILE" = "geomarker-io") -#' } -codec_dpkg_s3_put <- function(x) { - if (!inherits(x, "dpkg")) rlang::abort("x must be a dpkg object") - the_file <- dpkg::write_dpkg(x, tempdir()) - out <- - system2( - "aws", - c( - "s3", "cp", the_file, - glue::glue("s3://geomarker-io/codec_data/{attr(x, 'name')}-v{attr(x, 'version')}.parquet"), - "--acl public-read" - ) - ) - if (!out == 0L) rlang::abort("aws s3 cp command failed") - return(as.character(glue::glue("s3://geomarker-io/codec_data/{attr(x, 'name')}-v{attr(x, 'version')}.parquet"))) -} - diff --git a/inst/data_catalog.R b/inst/codec_catalog/app.R similarity index 100% rename from inst/data_catalog.R rename to inst/codec_catalog/app.R diff --git a/inst/codec_data/property_code_enforcements/property_code_enforcements.R b/inst/codec_data/property_code_enforcements/property_code_enforcements.R index 6358e85..e0916ef 100644 --- a/inst/codec_data/property_code_enforcements/property_code_enforcements.R +++ b/inst/codec_data/property_code_enforcements/property_code_enforcements.R @@ -7,45 +7,59 @@ message("Using CoDEC, version ", packageVersion("codec")) library(dplyr) library(dpkg) library(addr) +library(sf) options(arrow.unsafe_metadata = TRUE) -cagis_addr_data <- - addr::cagis_addr |> +addr_per_tract <- + addr::cagis_addr() |> mutate(cagis_s2 = purrr::map(cagis_addr_data, \(d) pull(d, cagis_s2))) |> select(-cagis_addr_data) |> tidyr::unnest(cols = c(cagis_s2)) |> filter(!is.na(cagis_s2)) |> distinct(cagis_addr, .keep_all = TRUE) |> mutate(census_tract_id_2010 = tract::get_census_tract_id(cagis_s2, year = "2010")) |> - distinct(cagis_addr, .keep_all = TRUE) - -addr_per_tract <- - cagis_addr_data |> + distinct(cagis_addr, .keep_all = TRUE) |> group_by(census_tract_id_2010) |> summarize(n_addr = n()) # read in parcel data resource property_code_enforcements <- - dpkg::stow("gh://geomarker-io/parcel/property_code_enforcements-v1.0.1") |> + dpkg::stow("gh://geomarker-io/parcel/property_code_enforcements-v1.1.1") |> dpkg::read_dpkg() |> + filter(!is.na(lon_jittered), !is.na(lat_jittered)) |> + st_as_sf(coords = c("lon_jittered", "lat_jittered"), crs = 4326) |> + st_transform(st_crs(cincy::tract_tigris_2010)) |> + st_join(cincy::tract_tigris_2010, largest = TRUE) |> + st_drop_geometry() |> mutate( year = lubridate::year(date), month = lubridate::month(date) ) |> - left_join(cagis_addr_data, by = "cagis_addr") |> group_by(census_tract_id_2010, year, month) |> summarize(n_violations = n()) |> filter(!is.na(census_tract_id_2010)) |> left_join(addr_per_tract, by = "census_tract_id_2010") |> mutate(violations_per_addr = n_violations/n_addr) +min_year_month <- + property_code_enforcements |> + ungroup() |> + arrange(year, month) |> + slice(1) + +max_year_month <- + property_code_enforcements |> + ungroup() |> + arrange(desc(year), desc(month)) |> + slice(1) + all_tracts <- cincy::tract_tigris_2010 |> sf::st_drop_geometry() |> as_tibble() |> mutate(date = list(seq.Date( - from = as.Date("2001-04-01"), - to = as.Date("2024-07-01"), + from = as.Date(glue::glue("{min_year_month$year}-{min_year_month$month}-01")), + to = as.Date(glue::glue("{max_year_month$year}-{max_year_month$month}-01")), by = "month" ))) |> tidyr::unnest(cols = c(date)) |> @@ -62,7 +76,7 @@ out_dpkg <- by = c("census_tract_id_2010", "year", "month")) |> as_codec_dpkg( name = "property_code_enforcements", - version = "0.1.0", + version = "0.2.0", title = "Property Code Enforcements", homepage = "https://geomarker.io/codec", description = paste(readLines(fs::path_package("codec", "codec_data", "property_code_enforcements", "README.md")), collapse = "\n") diff --git a/inst/codec_data/xx_address/README.md b/inst/codec_data/xx_address/README.md index beb1ce4..19ab792 100644 --- a/inst/codec_data/xx_address/README.md +++ b/inst/codec_data/xx_address/README.md @@ -2,6 +2,6 @@ [![latest github release for xx_address dpkg](https://img.shields.io/github/v/release/geomarker-io/codec?sort=date&filter=xx_address-*&display_name=tag&label=%5B%E2%98%B0%5D&labelColor=%238CB4C3&color=%23396175)](https://github.com/geomarker-io/codec/releases?q=xx_address&expanded=false) -Census tract-level measures of crime incidents (including property crimes, violent crimes, other crimes, and gunshots) in Hamilton County, Ohio. Tract-level measures are derived from the data packages stored in the [`xx_address` repository](https://github.com/geomarker-io/xx_address). Version 1.0.1 of the `xx_address` CoDEC data resource harmonizes [`crime_incidents-v0.1.1`](https://github.com/geomarker-io/xx_address/releases/tag/crime_incidents-v0.1.1) and [`shotspotter-v0.1.1`](https://github.com/geomarker-io/xx_address/releases/tag/shotspotter-v0.1.1). View the metadata for each of these data packages for more information about their sources. +Census tract-level measures of crime incidents (including property crimes, violent crimes, other crimes, gunshots, and reported shootings) in Hamilton County, Ohio. Tract-level measures are derived from the data packages stored in the [`xx_address` repository](https://github.com/geomarker-io/xx_address), including [`crime_incidents-v0.1.2`](https://github.com/geomarker-io/xx_address/releases/tag/crime_incidents-v0.1.2), [`shotspotter-v0.1.2`](https://github.com/geomarker-io/xx_address/releases/tag/shotspotter-v0.1.2), and [`reported_shootings-v0.1.0`](https://github.com/geomarker-io/xx_address/releases/tag/reported_shootings-v0.1.0). View the metadata for each of these data packages for more information about their sources. -Crime measures were geocoded to the street range, then aggregated to the tract level by summing the number of crimes for all streets that intersect the tract. If a street range overlaps more than one tract, the crimes are counted for the tract in which the majority of the street lies. +Jittered (within the same block) latitude and longitude corresponding to the location of each reported crime are available from each data source. Crimes aggregated to the tract level by summing the number of crimes for each tract. For higher resolution crime data, see the [`xx_address` repository](https://github.com/geomarker-io/xx_address). \ No newline at end of file diff --git a/inst/codec_data/xx_address/xx_address.R b/inst/codec_data/xx_address/xx_address.R index 6a8de5d..bb1c449 100644 --- a/inst/codec_data/xx_address/xx_address.R +++ b/inst/codec_data/xx_address/xx_address.R @@ -8,21 +8,23 @@ library(dplyr) library(sf) library(dpkg) library(geoarrow) +library(addr) options(arrow.unsafe_metadata = TRUE) crime_incidents <- - dpkg::stow("gh://geomarker-io/xx_address/crime_incidents-v0.1.1") |> + dpkg::stow("gh://geomarker-io/xx_address/crime_incidents-v0.1.2") |> arrow::read_parquet() |> - mutate(geometry = sf::st_as_sfc(geometry)) |> - st_as_sf(crs = 4326) |> + select(date_time, lon_jittered, lat_jittered, category) |> + filter(!is.na(lon_jittered), !is.na(lat_jittered)) |> + st_as_sf(coords = c("lon_jittered", "lat_jittered"), crs = 4326) |> st_transform(st_crs(cincy::tract_tigris_2010)) |> st_join(cincy::tract_tigris_2010, largest = TRUE) |> st_drop_geometry() |> mutate( year = lubridate::year(date_time), month = lubridate::month(date_time) - ) |> - select(-tlid, -address_x, -date_time) |> + ) |> + select(-date_time) |> group_by(census_tract_id_2010, year, month, category) |> tally()|> tidyr::pivot_wider( @@ -31,10 +33,11 @@ crime_incidents <- ) shotspotter <- - dpkg::stow("gh://geomarker-io/xx_address/shotspotter-v0.1.1") |> + dpkg::stow("gh://geomarker-io/xx_address/shotspotter-v0.1.2") |> arrow::read_parquet() |> - mutate(geometry = sf::st_as_sfc(geometry)) |> - st_as_sf(crs = 4326) |> + select(date_time, lon_jittered, lat_jittered) |> + filter(!is.na(lon_jittered), !is.na(lat_jittered)) |> + st_as_sf(coords = c("lon_jittered", "lat_jittered"), crs = 4326) |> st_transform(st_crs(cincy::tract_tigris_2010)) |> st_join(cincy::tract_tigris_2010, largest = TRUE) |> st_drop_geometry() |> @@ -42,18 +45,36 @@ shotspotter <- year = lubridate::year(date_time), month = lubridate::month(date_time) ) |> - select(-tlid, -address_x, -date_time) |> + select(-date_time) |> group_by(census_tract_id_2010, year, month) |> tally() |> rename(gunshots = n) +reported_shootings <- + dpkg::stow("gh://geomarker-io/xx_address/reported_shootings-v0.1.0") |> + arrow::read_parquet() |> + select(date, lon_jittered, lat_jittered) |> + filter(!is.na(lon_jittered), !is.na(lat_jittered)) |> + st_as_sf(coords = c("lon_jittered", "lat_jittered"), crs = 4326) |> + st_transform(st_crs(cincy::tract_tigris_2010)) |> + st_join(cincy::tract_tigris_2010, largest = TRUE) |> + st_drop_geometry() |> + mutate( + year = lubridate::year(date), + month = lubridate::month(date) + ) |> + select(-date) |> + group_by(census_tract_id_2010, year, month) |> + tally() |> + rename(reported_shootings = n) + all_tracts <- cincy::tract_tigris_2010 |> st_drop_geometry() |> as_tibble() |> mutate(date = list(seq.Date( from = as.Date("2011-01-01"), - to = as.Date("2024-06-01"), + to = as.Date("2024-11-01"), by = "month" ))) |> tidyr::unnest(cols = c(date)) |> @@ -64,10 +85,12 @@ all_tracts <- select(-date) d_out <- - left_join(all_tracts, crime_incidents, by = c("census_tract_id_2010", "year", "month")) |> - left_join(shotspotter, by = c("census_tract_id_2010", "year", "month")) |> + purrr::reduce( + .x = list(all_tracts, crime_incidents, shotspotter, reported_shootings), + .f = \(x, y) left_join(x, y, by = c("census_tract_id_2010", "year", "month")) + ) |> mutate( - across(c(property, violent, other, gunshots), + across(c(property:reported_shootings), \(x) ifelse(is.na(x), 0, x))) |> filter(!is.na(census_tract_id_2010)) @@ -75,7 +98,7 @@ out_dpkg <- d_out |> as_codec_dpkg( name = "xx_address", - version = "0.1.0", + version = "0.2.0", title = "Crime", homepage = "https://geomarker.io/codec", description = paste(readLines(fs::path_package("codec", "codec_data", "xx_address", "README.md")), collapse = "\n") diff --git a/justfile b/justfile index 6f07181..d643be0 100644 --- a/justfile +++ b/justfile @@ -7,3 +7,7 @@ release_data codec_dpkg_name: build_shiny: Rscript -e "shiny::runApp('./inst/codec_shiny', launch.browser = TRUE)" +# build and view shiny application +build_catalog: + Rscript -e "shiny::runApp('./inst/codec_catalog', launch.browser = TRUE)" + diff --git a/man/as_codec_dpkg.Rd b/man/as_codec_dpkg.Rd index 0996cc6..5d18725 100644 --- a/man/as_codec_dpkg.Rd +++ b/man/as_codec_dpkg.Rd @@ -2,7 +2,8 @@ % Please edit documentation in R/codec_dpkg.R \name{as_codec_dpkg} \alias{as_codec_dpkg} -\title{as_codec_dpkg} +\alias{is_codec_dpkg} +\title{Convert a tibble to a CoDEC data package} \usage{ as_codec_dpkg( x, @@ -12,6 +13,8 @@ as_codec_dpkg( description = character(), homepage = character() ) + +is_codec_dpkg(x) } \arguments{ \item{x}{data.frame or tibble meeting CoDEC data specifications above} @@ -27,24 +30,25 @@ as_codec_dpkg( \item{homepage}{see \code{dpkg::as_dpkg()}} } \value{ -a dpkg object +for as_codec_dpkg, a dpkg object + +for is_codec_dpkg, a logical } \description{ -Convert a tibble to a data package (\code{dpkg}) object in R while checking it -against CoDEC data specifications: -} -\details{ +\strong{CoDEC Specifications:} \enumerate{ \item The data must include a \href{https://www2.census.gov/geo/pdfs/education/CensusTracts.pdf}{census tract} -identifier column (i.e., \code{census_tract_id_2000}, \code{census_tract_id_2010}, or \code{census_tract_id_2020}). +identifier column (i.e., \code{census_tract_id_2010}, or \code{census_tract_id_2020}). The column must contain 11-digit \href{https://www.census.gov/programs-surveys/geography/guidance/geo-identifiers.html}{GEOID} identifiers for every census tract in Hamilton County, OH. \item The data includes a year column (\code{year}), an integer year representing the vintage of the data (e.g. \code{2021}). The data can optionally include a month column (\code{month}), an integer month of the year. -} - -Data must be structured in a tidy format such that each row is an observation +\item Data must be structured in a tidy format such that each row is an observation for a specific census tract at a specific year (and month). } +} +\examples{ +is_codec_dpkg(mtcars) +} diff --git a/man/cincy_census_geo.Rd b/man/cincy_census_geo.Rd new file mode 100644 index 0000000..acbcd04 --- /dev/null +++ b/man/cincy_census_geo.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cincy_geographies.R +\name{cincy_census_geo} +\alias{cincy_census_geo} +\alias{cincy_county_geo} +\title{Cincy census tracts and block groups} +\usage{ +cincy_census_geo( + geography = c("tract", "bg"), + vintage = as.character(2024:2013) +) + +cincy_county_geo(vintage = as.character(2024:2013)) +} +\arguments{ +\item{geography}{which type of cincy census geography to return} + +\item{vintage}{a character vector of a year corresponding to the vintage of TIGER/Line data} +} +\value{ +a simple features object with a geographic identifier column (\code{geoid}) +and a geometry column (\code{s2_geography}) +} +\description{ +Read tract and block group ("bg") geographies from the online Census +\href{https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html}{TIGER/Line} +files into R +} +\details{ +Compressed shapefiles are downloaded from TIGER into an R user data directory and will be cached +for use across other R sessions (see \code{?dpkg::stow} for more details). +} +\examples{ +cincy_census_geo("tract", "2024") +cincy_census_geo("tract", "2020") +cincy_census_geo("tract", "2019") +cincy_census_geo("bg", "2020") +cincy_census_geo("bg", "2019") +cincy_county_geo("2024") +} diff --git a/man/cincy_neighborhood_geo.Rd b/man/cincy_neighborhood_geo.Rd new file mode 100644 index 0000000..ba83e37 --- /dev/null +++ b/man/cincy_neighborhood_geo.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cincy_geographies.R +\name{cincy_neighborhood_geo} +\alias{cincy_neighborhood_geo} +\title{Cincy neighborhood geographies} +\usage{ +cincy_neighborhood_geo( + geography = c("statistical_neighborhood_approximations", "community_council") +) +} +\arguments{ +\item{geography}{which type of cincy neighborhood geography to return} +} +\value{ +a simple features object with a geographic identifier column (\code{geoid}) +and a geometry column (\code{s2_geography}) +} +\description{ +CAGIS data (see \code{install_cagis_data()}) provides community council boundaries, but these boundaries can +overlap and do not align with census geographies or ZIP codes. +By default, the statistical neighborhood approximations are instead returned, +which are calculated by aggregating census tracts into 50 matching neighborhoods. +} +\examples{ +cincy_neighborhood_geo("statistical_neighborhood_approximations") +cincy_neighborhood_geo("community_council") +} diff --git a/man/cincy_neighorhood_geo.Rd b/man/cincy_neighorhood_geo.Rd new file mode 100644 index 0000000..8c1b294 --- /dev/null +++ b/man/cincy_neighorhood_geo.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cincy_geographies.R +\name{cincy_city_geo} +\alias{cincy_city_geo} +\title{cincy_city_geo()} +\usage{ +cincy_city_geo() +} +\description{ +cincy_city_geo() +} +\examples{ +cincy_city_geo() +} diff --git a/man/cincy_zcta_geo.Rd b/man/cincy_zcta_geo.Rd new file mode 100644 index 0000000..d0d2c1a --- /dev/null +++ b/man/cincy_zcta_geo.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cincy_geographies.R +\name{cincy_zcta_geo} +\alias{cincy_zcta_geo} +\title{Cincy ZIP Code Tabulation Areas} +\usage{ +cincy_zcta_geo(vintage = as.character(2024:2013)) +} +\arguments{ +\item{vintage}{a character vector of a year corresponding to the vintage of TIGER/Line data} +} +\value{ +a simple features object with a geographic identifier column (\code{geoid}) +and a geometry column (\code{s2_geography}) +} +\description{ +Read \href{https://www.census.gov/programs-surveys/geography/guidance/geo-areas/zctas.html}{ZIP Code Tabulation Areas (ZCTAs)} +geographies from the online Census +\href{https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html}{TIGER/Line} +files into R +} +\examples{ +cincy_zcta_geo() +cincy_zcta_geo("2018") +} diff --git a/man/codec_as_sf.Rd b/man/codec_as_sf.Rd new file mode 100644 index 0000000..7852738 --- /dev/null +++ b/man/codec_as_sf.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cincy_interpolate.R +\name{codec_as_sf} +\alias{codec_as_sf} +\title{Coerce CoDEC data package into a simple features object} +\usage{ +codec_as_sf(x) +} +\arguments{ +\item{x}{a CoDEC data package} +} +\value{ +a simple features object with a geographic identifier column (\code{geoid}) +and a geometry column (\code{s2_geography}) in addition to the columns in \code{x} +} +\description{ +The name of the census tract column in the CoDEC data package is used to add +the appropriate cincy geography. +} +\details{ +Tract identifers do not change across decennial censuses, but the digital representation of their boundaries +may be improved over time. Here, data packages using 2010 tract identifers use the TIGER/Line 2019 tract shapefiles +and data packages using 2020 tract identifiers use the TIGER/Line 2020 tract shapefiles +} +\examples{ +codec_as_sf(get_codec_dpkg("property_code_enforcements-v0.2.0")) +} diff --git a/man/codec_dpkg_as_sf.Rd b/man/codec_dpkg_as_sf.Rd deleted file mode 100644 index 3860e3f..0000000 --- a/man/codec_dpkg_as_sf.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/codec_dpkg.R -\name{codec_dpkg_as_sf} -\alias{codec_dpkg_as_sf} -\title{Convert a CoDEC dpkg into an sf object} -\usage{ -codec_dpkg_as_sf(x) -} -\arguments{ -\item{x}{a CoDEC dpkg} -} -\value{ -an \code{sf} object that is a codec dpkg with an added \code{geometry} column -} -\description{ -The required census tract identifier column name is used to merge in tract geographies -from the \href{https://geomarker.io/cincy}{cincy} package. -} -\examples{ -get_codec_dpkg("drivetime-v0.2.2") |> - codec_dpkg_as_sf() -} diff --git a/man/codec_dpkg_s3_put.Rd b/man/codec_dpkg_s3_put.Rd deleted file mode 100644 index 5b04937..0000000 --- a/man/codec_dpkg_s3_put.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/codec_dpkg_s3.R -\name{codec_dpkg_s3_put} -\alias{codec_dpkg_s3_put} -\title{Put a dpkg into the public CoDEC S3 bucket} -\usage{ -codec_dpkg_s3_put(x) -} -\arguments{ -\item{x}{a data package (\code{dpkg}) object, ideally created with \code{as_codec_dpkg()} to ensure -the data meets CoDEC specifications} -} -\value{ -character string URI of uploaded resource -} -\description{ -The \href{https://aws.amazon.com/cli/}{AWS CLI} tool must be installed and authenticated to -write to \verb{s3://geomarker-io/codec_data}. -The resulting data package will be available publicly. -} -\examples{ -\dontrun{ -# use aws command line to login interactively via profile sso account" -system2("aws", c("sso", "login", "--profile", "geomarker-io")) -# make sure to set AWS_PROFILE so the AWS CLI tool knows to use it by default" -Sys.setenv("AWS_PROFILE" = "geomarker-io") -} -} diff --git a/man/codec_interpolate.Rd b/man/codec_interpolate.Rd new file mode 100644 index 0000000..a76548c --- /dev/null +++ b/man/codec_interpolate.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cincy_interpolate.R +\name{codec_interpolate} +\alias{codec_interpolate} +\title{Spatially interpolate community-level data} +\usage{ +codec_interpolate( + from, + to = c("neigh", "zcta", "bg"), + weights = c("pop", "homes", "area") +) +} +\arguments{ +\item{from}{a CoDEC data package} + +\item{to}{character string naming a target geography} + +\item{weights}{which census block-level weights to use; see details} +} +\value{ +a tibble with a new geographic identifier column for the \code{to} target geography (\code{geoid}) +in addition to the (interpolated) columns in \code{from} +} +\description{ +Census block-level weights are used to spatially interpolate CoDEC data packages from 2020 +census tract geographies to 2020 neighborhoods (\code{neigh}), ZIP code tabulation areas (\code{zcta}), or block groups (\code{bg}). +} +\details{ +Block-level total population (\code{pop}), total number of homes (\code{homes}), or total land area (\code{area}) +from the 2020 Census can be chosen to use for the weights. +Geospatial intersection happens after transforming geographies to epsg:5072. +See \code{codec_as_sf()} for adding geography to a CoDEC data package. +Variables beginning with "n_" are interpolated using a weighted sum; +all other variables are interpolated using a weighted mean. +} +\examples{ +codec_interpolate(from = get_codec_dpkg("acs_measures-v0.1.0")) +# TODO codec_interpolate(from = get_codec_dpkg("property_code_enforcements-v0.2.0")) +} diff --git a/man/get_codec_dpkg.Rd b/man/get_codec_dpkg.Rd index d0bb0d2..1b2f5cb 100644 --- a/man/get_codec_dpkg.Rd +++ b/man/get_codec_dpkg.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/codec_dpkg.R \name{get_codec_dpkg} \alias{get_codec_dpkg} -\title{Read a dpkg from the public CoDEC repository into R} +\title{Read a dpkg from CoDEC into R} \usage{ get_codec_dpkg(codec_dpkg, overwrite = FALSE) } diff --git a/man/install_cagis_data.Rd b/man/install_cagis_data.Rd new file mode 100644 index 0000000..194453e --- /dev/null +++ b/man/install_cagis_data.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cincy_geographies.R +\name{install_cagis_data} +\alias{install_cagis_data} +\title{Install CAGIS GIS database} +\usage{ +install_cagis_data( + cagis_data_url = + "https://www.cagis.org/Opendata/Quarterly_GIS_Data/CAGISOpenDataQ4_2024.gdb.zip" +) +} +\arguments{ +\item{cagis_data_url}{the url to the CAGIS Open Data .gdb.zip file; this changes quarterly, so +\href{https://www.cagis.org/Opendata/Quarterly_GIS_Data}{check} for something more recent if the file cannot be found} +} +\description{ +This installs the CAGIS Open Data GIS database (\code{.gdb}) into the data +directory for the codec package. Once downloaded, it will be reused +across R sessions on the same computer. +The geodatabase contains many +\href{https://www.cagis.org/Opendata/Quarterly_GIS_Data/OpenData_Layer_List.txt}{layers} that are +updated quarterly. (Historical geodatabases are not available here.) +} +\examples{ +options(timeout = max(2500, getOption("timeout")), download.file.method = "libcurl") +install_cagis_data() +sf::st_layers(install_cagis_data())$name +} +\seealso{ +This function is called by \code{cincy_neighborhood_geo()}, \code{cincy_city_geo()} +and others that import individual layers. +} diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index bed4405..5e4c7ee 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -13,7 +13,7 @@ template: code_font: {google: "Source Code Pro"} navbar: structure: - left: [about, data, harmonization, explore, reference] + left: [about, harmonization,reference, data, explore] components: about: text: About @@ -29,15 +29,21 @@ navbar: href: articles/data.html reference: - - title: Accessing CoDEC data + - title: CoDEC data contents: - get_codec_dpkg - - codec_dpkg_as_sf - - title: Creating CoDEC data - contents: + - codec_interpolate - as_codec_dpkg - - codec_dpkg_s3_put - - title: CoDEC Developer Resources + - codec_as_sf + - title: cincy geographies + contents: + - cincy_census_geo + - cincy_county_geo + - cincy_neighborhood_geo + - cincy_city_geo + - install_cagis_data + - cincy_zcta_geo + - title: CoDEC developer tools contents: - codec_colors diff --git a/tests/testthat/test-cincy_geographies.R b/tests/testthat/test-cincy_geographies.R new file mode 100644 index 0000000..d17b3a8 --- /dev/null +++ b/tests/testthat/test-cincy_geographies.R @@ -0,0 +1,89 @@ +test_that("cincy zcta works", { + d <- cincy_zcta_geo("2024") + expect_equal(names(d), c("geoid", "s2_geography")) + expect_s3_class(d, c("sf", "tbl_df")) + expect_equal(nrow(d), 55L) + expect_equal(round(sum(s2::s2_area(d$s2_geography))), 1083637494L) + + d <- cincy_zcta_geo("2019") + expect_equal(names(d), c("geoid", "s2_geography")) + expect_s3_class(d, c("sf", "tbl_df")) + expect_equal(nrow(d), 54L) + expect_equal(round(sum(s2::s2_area(d$s2_geography))), 1089190173L) +}) + +test_that("cincy tracts and block groups", { + d <- cincy_census_geo("tract", "2024") + expect_equal(nrow(d), 226) + expect_s3_class(d, c("sf", "tbl_df")) + expect_s3_class(d$s2_geography, "sfc") + expect_true(is.character(d$geoid)) + + d <- cincy_census_geo("tract", "2019") + expect_equal(nrow(d), 222) + expect_s3_class(d, c("sf", "tbl_df")) + expect_s3_class(d$s2_geography, "sfc") + expect_true(is.character(d$geoid)) + + d <- cincy_census_geo("bg", "2024") + expect_equal(nrow(d), 678) + expect_s3_class(d, c("sf", "tbl_df")) + expect_s3_class(d$s2_geography, "sfc") + expect_true(is.character(d$geoid)) + + d <- cincy_census_geo("bg", "2019") + expect_equal(nrow(d), 697) + expect_s3_class(d, c("sf", "tbl_df")) + expect_s3_class(d$s2_geography, "sfc") + expect_true(is.character(d$geoid)) +}) + +test_that("geography functions will error for years outside of 2013 - 2024", { + cincy_census_geo("tract", "2012") |> + expect_error("must be one of") + + cincy_county_geo("2012") |> + expect_error("must be one of") +}) + +test_that("cincy county", { + d <- cincy_county_geo("2024") + expect_equal(length(d), 1) + expect_s3_class(d, c("s2_geography", "wk_vctr")) + expect_equal(round(s2::s2_area(d)), 1067799848L) +}) + +test_that("cincy city", { + d <- cincy_city_geo() + expect_equal(length(d), 1) + expect_s3_class(d, c("s2_geography", "wk_vctr")) + expect_equal(round(s2::s2_area(d)), 206352433L) +}) + +test_that("cincy zcta", { + d <- cincy_zcta_geo("2024") + expect_equal(nrow(d), 55) + expect_s3_class(d, c("sf", "tbl_df")) + expect_s3_class(d$s2_geography, "sfc") + expect_true(is.character(d$geoid)) + + d <- cincy_zcta_geo("2018") + expect_equal(nrow(d), 54) + expect_s3_class(d, c("sf", "tbl_df")) + expect_s3_class(d$s2_geography, "sfc") + expect_true(is.character(d$geoid)) +}) + +test_that("cincy neighborhoods", { + d <- cincy_neighborhood_geo("statistical_neighborhood_approximations") + expect_equal(nrow(d), 50) + expect_s3_class(d, c("sf", "tbl_df")) + expect_s3_class(d$s2_geography, "sfc") + expect_true(is.character(d$geoid)) + + d <- cincy_neighborhood_geo("community_council") + expect_equal(nrow(d), 75) + expect_s3_class(d, c("sf", "tbl_df")) + expect_s3_class(d$s2_geography, "sfc") + expect_true(is.character(d$geoid)) +}) diff --git a/tests/testthat/test-cincy_interpolate.R b/tests/testthat/test-cincy_interpolate.R new file mode 100644 index 0000000..9493bfd --- /dev/null +++ b/tests/testthat/test-cincy_interpolate.R @@ -0,0 +1,32 @@ +test_that("codec_as_sf", { + d <- codec_as_sf(get_codec_dpkg("acs_measures-v0.1.0")) + expect_s3_class(d, c("sf", "tbl_df")) + expect_s3_class(d$s2_geography, "sfc") + expect_true("geoid" %in% names(d)) +}) + +test_that("cincy_block_weights", { + d <- cincy_block_weights() + expect_s3_class(d, c("sf", "tbl_df")) + expect_s3_class(d$s2_geography, "sfc") + expect_equal(names(d), c("pop", "homes", "area", "s2_geography")) +}) + +test_that("codec_interpolate", { + + codec_interpolate(get_codec_dpkg("acs_measures-v0.1.0"), to = "zcta", weights = "pop") |> + expect_s3_class("tbl_df") |> + nrow() |> + expect_equal(54L) + + codec_interpolate(get_codec_dpkg("acs_measures-v0.1.0"), to = "neigh", weights = "homes") |> + expect_s3_class("tbl_df") |> + nrow() |> + expect_equal(51L) + + codec_interpolate(get_codec_dpkg("acs_measures-v0.1.0"), to = "bg", weights = "area") |> + expect_s3_class("tbl_df") |> + nrow() |> + expect_equal(678L) + +}) diff --git a/tests/testthat/test-codec_check.R b/tests/testthat/test-codec_check.R index d149217..55498b5 100644 --- a/tests/testthat/test-codec_check.R +++ b/tests/testthat/test-codec_check.R @@ -7,12 +7,12 @@ test_that("check census tract id", { readRDS(testthat::test_path("drivetime", "drivetime.rds")) |> dplyr::rename(census_tract_id = census_tract_id_2010) |> check_census_tract_id() |> - expect_identical("must contain one census tract id column called census_tract_id_2000, census_tract_id_2010, or census_tract_id_2020") + expect_identical("must contain one census tract id column called census_tract_id_2010 or census_tract_id_2020") readRDS(testthat::test_path("drivetime", "drivetime.rds")) |> dplyr::slice_head(n = 5) |> check_census_tract_id() |> - expect_identical("the census tract id column, census_tract_id_2010, does not contain every census tract in `cincy::tract_tigris_2010`") + expect_identical("the census tract id column, census_tract_id_2010, does not contain every census tract for that vintage; Check for missing census tract observations and check that you are using the correct vintage.") }) test_that("check date", { @@ -49,3 +49,8 @@ test_that("as_codec_dpkg works", { as_codec_dpkg(name = "foofy", version = "0.0.0") |> expect_error("does not contain") }) + +test_that("is_codec_dpkg works", { + expect_false(is_codec_dpkg(mtcars)) + expect_true(is_codec_dpkg(get_codec_dpkg("drivetime-v0.2.2"))) +})