geomarker-io · cole-brokamp · Nov 7, 2024 · Nov 7, 2024 · Nov 7, 2024 · Nov 8, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -20,23 +20,21 @@ Suggests:
     roxygen2,
     rmarkdown,
     dplyr,
-    curl,
-    glue,
     tibble,
-    sf
+    cincy
 Remotes:
-    geomarker-io/cincy,
-    cole-brokamp/dpkg
+    geomarker-io/cincy
 Config/testthat/edition: 3
 URL: https://github.com/geomarker-io/codec,
     http://geomarker.io/codec/
 BugReports: https://github.com/geomarker-io/codec/issues
 Imports: 
     rlang,
     stringr,
+    glue,
     purrr (>= 1.0.0),
-    cincy (>= 1.1.0),
-    dpkg (>= 0.5.1)
+    dpkg,
+    sf
 Depends: 
     R (>= 2.10)
 LazyData: true
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,8 +1,12 @@
 # Generated by roxygen2: do not edit by hand
 
 export(as_codec_dpkg)
+export(cincy_census_geo)
+export(cincy_city_geo)
+export(cincy_county_geo)
+export(cincy_neighborhood_geo)
+export(cincy_zcta_geo)
 export(codec_colors)
-export(codec_dpkg_as_sf)
-export(codec_dpkg_s3_put)
 export(get_codec_dpkg)
+export(is_codec_dpkg)
 import(cincy)
diff --git a/R/cincy_geographies.R b/R/cincy_geographies.R
@@ -0,0 +1,195 @@
+#' Cincy census tracts and block groups
+#'
+#' Read tract and block group ("bg") geographies from the online Census
+#' [TIGER/Line](https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html)
+#' files into R
+#' @param geography which type of cincy census geography to return
+#' @param vintage a character vector of a year corresponding to the vintage of TIGER/Line data
+#' @details
+#' Compressed shapefiles are downloaded from TIGER into an R user data directory and will be cached
+#' for use across other R sessions (see `?dpkg::stow` for more details).
+#' @returns a simple features object with a geographic identifier column (`geoid`)
+#' and a geometry column (`s2_geography`)
+#' @export
+#' @examples
+#' cincy_census_geo("tract", "2024")
+#' cincy_census_geo("tract", "2020")
+#' cincy_census_geo("tract", "2019")
+#' cincy_census_geo("bg", "2020")
+#' cincy_census_geo("bg", "2019")
+cincy_census_geo <- function(geography = c("tract", "bg"), vintage = as.character(2024:2013)) {
+  geography <- rlang::arg_match(geography)
+  vintage <- rlang::arg_match(vintage)
+  tiger_url <- glue::glue(
+    "https://www2.census.gov/geo/tiger/TIGER{vintage}",
+    "/{toupper(geography)}/tl_{vintage}_39_{geography}.zip"
+  )
+  tiger_local <- dpkg::stow_url(tiger_url)
+  out <-
+    sf::read_sf(glue::glue("/vsizip/", tiger_local),
+      query = glue::glue("SELECT GEOID FROM tl_{vintage}_39_{geography} WHERE COUNTYFP = '061'")
+    )
+  names(out) <- tolower(names(out))
+  out$s2_geography <- sf::st_as_s2(out$geometry)
+  out <- sf::st_drop_geometry(out)
+  out <- sf::st_as_sf(out)
+  return(out)
+}
+
+#' Cincy county
+#' @rdname cincy_census_geo
+#' @export
+#' @examples
+#' cincy_county_geo("2024")
+cincy_county_geo <- function(vintage = as.character(2024:2013)) {
+  vintage <- rlang::arg_match(vintage)
+  tiger_url <- glue::glue("https://www2.census.gov/geo/tiger/TIGER{vintage}/COUNTY/tl_{vintage}_us_county.zip")
+  tiger_local <- dpkg::stow_url(tiger_url)
+  out <-
+    sf::read_sf(glue::glue("/vsizip/", tiger_local),
+      query = glue::glue("SELECT GEOID FROM tl_{vintage}_us_county WHERE GEOID = '39061'")
+    )
+  return(sf::st_as_s2(out$geometry))
+}
+
+#' Install CAGIS GIS database
+#' 
+#' This installs the CAGIS Open Data GIS database (`.gdb`) into the data
+#' directory for the codec package. Once downloaded, it will be reused
+#' across R sessions on the same computer.
+#' The geodatabase contains many [layers](https://www.cagis.org/Opendata/Quarterly_GIS_Data/OpenData_Layer_List.txt) that are
+#' updated quarterly. (Historical geodatabases are not available here.)
+#' @seealso This function is called by `cincy_neighborhood_geo()`, `cincy_city_geo()` and others that import individual layers.
+#' @param cagis_data_url the url to the CAGIS Open Data .gdb.zip file; this changes quarterly, so
+#' [check](https://www.cagis.org/Opendata/Quarterly_GIS_Data) for something more recent if the file cannot be found
+#' @examples
+#' options(timeout = max(2500, getOption("timeout")), download.file.method = "libcurl")
+#' install_cagis_data()
+#' sf::st_layers(install_cagis_data())$name
+install_cagis_data <- function(cagis_data_url = "https://www.cagis.org/Opendata/Quarterly_GIS_Data/CAGISOpenDataQ4_2024.gdb.zip") {
+  cagis_gdb_name <- tools::file_path_sans_ext(basename(cagis_data_url))
+  dest <- file.path(tools::R_user_dir(package = "codec", "data"), cagis_gdb_name)
+  if (file.exists(dest)) {
+    return(dest)
+  }
+  tmp <- tempfile(fileext = ".zip")
+  utils::download.file(cagis_data_url, destfile = tmp, mode = "wb")
+  unzip(tmp, exdir = dirname(dest))
+  return(dest)
+}
+
+#' Cincy neighborhood geographies
+#'
+#' CAGIS data (see `install_cagis_data()`) provides community council boundaries, but these boundaries can
+#' overlap and do not align with census geographies or ZIP codes.
+#' By default, the statistical neighborhood approximations are instead returned,
+#' which are calculated by aggregating census tracts into 50 matching neighborhoods.
+#' @param geography which type of cincy neighborhood geography to return
+#' @returns a simple features object with a geographic identifier column (`geoid`)
+#' and a geometry column (`s2_geography`)
+#' @export
+#' @examples
+#' cincy_neighborhood_geo("statistical_neighborhood_approximations")
+#' cincy_neighborhood_geo("community_council")
+cincy_neighborhood_geo <- function(geography = c("statistical_neighborhood_approximations", "community_council")) {
+  geography <- rlang::arg_match(geography)
+  if (geography == "statistical_neighborhood_approximations") {
+    noi <- c("Cincinnati_Statistical_Neighborhood_Approximations" = "SNA_NAME")
+  }
+  if (geography == "community_council") {
+    noi <- c("Cincinnati_Community_Council_Neighborhoods" = "NEIGH")
+  }
+  d <- sf::st_read(install_cagis_data(), names(noi), quiet = TRUE)
+  out <- tibble::tibble(
+    geoid = sf::st_drop_geometry(d)[, noi],
+    s2_geography = sf::st_as_s2(sf::st_cast(sf::st_zm(d$SHAPE), "MULTIPOLYGON"))
+  ) |>
+    sf::st_as_sf()
+  return(out)
+}
+
+#' cincy_city_geo()
+#' @export
+#' @rdname cincy_neighorhood_geo
+cincy_city_geo <- function() {
+  cagis_db <- install_cagis_data()
+  out <- sf::st_read(cagis_db, layer = "Cincinnati_City_Boundary", quiet = TRUE)
+  return(sf::st_as_s2(out$SHAPE))
+}
+
+
+#' Cincy census tracts and block groups
+#'
+#' Read tract and block group ("bg") geographies from the online Census
+#' [TIGER/Line](https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html)
+#' files into R
+#' @param geography which type of cincy census geography to return
+#' @param vintage a character vector of a year corresponding to the vintage of TIGER/Line data
+#' @details
+#' Compressed shapefiles are downloaded from TIGER into an R user data directory and will be cached
+#' for use across other R sessions (see `?dpkg::stow` for more details).
+#' @returns a simple features object with a geographic identifier column (`geoid`)
+#' and a geometry column (`s2_geography`)
+#' @export
+#' @examples
+#' cincy_census_geo("tract", "2024")
+#' cincy_census_geo("tract", "2020")
+#' cincy_census_geo("tract", "2019")
+#' cincy_census_geo("bg", "2020")
+#' cincy_census_geo("bg", "2019")
+
+#' Cincy ZIP Code Tabulation Areas
+#'
+#' Read [ZIP Code Tabulation Areas (ZCTAs)](https://www.census.gov/programs-surveys/geography/guidance/geo-areas/zctas.html)
+#' geographies from the online Census
+#' [TIGER/Line](https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html)
+#' files into R
+#' @param vintage a character vector of a year corresponding to the vintage of TIGER/Line data
+#' @export
+#' @returns a simple features object with a geographic identifier column (`geoid`)
+#' and a geometry column (`s2_geography`)
+#' @examples
+#' cincy_zcta_geo()
+#' cincy_zcta_geo("2018")
+cincy_zcta_geo <- function(vintage = as.character(2024:2013)) {
+  vintage <- rlang::arg_match(vintage)
+  is_vintage_old <- vintage %in% as.character(2013:2019)
+  tiger_url <- glue::glue(
+    "https://www2.census.gov/geo/tiger/TIGER{vintage}/",
+    ifelse(is_vintage_old, "ZCTA5", "ZCTA520"),
+    "/tl_{vintage}_us_zcta",
+    ifelse(is_vintage_old, "510", "520"),
+    ".zip"
+  )
+  tiger_local <- dpkg::stow_url(tiger_url)
+  out <-
+    sf::read_sf(glue::glue("/vsizip/", tiger_local),
+      query = glue::glue(
+        "SELECT ",
+        ifelse(is_vintage_old, "GEOID10", "GEOID20"),
+        " FROM tl_{vintage}_us_zcta",
+        ifelse(is_vintage_old, "510", "520"),
+        " WHERE ",
+        ifelse(is_vintage_old, "GEOID10", "GEOID20"),
+        " IN ({paste(paste0(\"'\", cincy_zip_codes, \"'\"), collapse = \", \")})"
+      )
+    )
+  names(out) <- gsub("[0-9]", "", tolower(names(out)))
+  out$s2_geography <- sf::st_as_s2(out$geometry)
+  out <- sf::st_drop_geometry(out)
+  out <- sf::st_as_sf(out)
+  return(out)
+}
+
+# from cincy::zcta_tiger_2020 (version 1.1.0) on 2024-11-08
+cincy_zip_codes <-
+  c(
+    "45214", "45208", "45236", "45247", "45225", "45205", "45220",
+    "45206", "45223", "45232", "45174", "45207", "45209", "45212",
+    "45213", "45217", "45218", "45229", "45238", "45242", "45051",
+    "45002", "45227", "45211", "45215", "45216", "45219", "45224",
+    "45033", "45237", "45239", "45248", "45041", "45267", "45030",
+    "45252", "45244", "45202", "45249", "45255", "45226", "45203",
+    "45246", "45111", "45147", "45052", "45240", "45241", "45243",
+    "45251", "45001", "45204", "45231", "45230", "45233"
+  )
diff --git a/R/cincy_interpolate.R b/R/cincy_interpolate.R
@@ -0,0 +1,98 @@
+#' Spatially interpolate community-level data
+#'
+#' Weights at the census block-level are used to spatially interpolate different geographies.
+#' Block-level total population, total number of homes, or total land area from the 2020 Census
+#' can be chosen to use for the weights.
+#' @param from a CoDEC data package
+#' @param to ; if NULL, tract geographies are returned without data interpolation
+#' @details Tract identifers do not change across decennial censuses, but the digital representation of their boundaries
+#' may be improved over time.  Here, data packages using 2010 tract identifers use the TIGER/Line 2019 tract shapefiles
+#' and data packages using 2020 tract identifiers use the TIGER/Line 2020 tract shapefiles
+#' @returns a simple features object with a geographic identifier column (`geoid`)
+#' and a geometry column (`s2_geography`) in addition to the (interpolated) columns in `from`
+#' @examples
+#' codec_interpolate(from = get_codec_dpkg("property_code_enforcements-v0.2.0"))
+#' codec_interpolate(get_codec_dpkg("property_code_enforcements-v0.2.0"), to = "zcta")
+codec_interpolate <- function(from, to = NULL, weights = c("pop", "homes", "area")) {
+  weights <- rlang::arg_match(weights)
+  if (!is_codec_dpkg(from)) rlang::abort("from must be a CoDEC data package")
+  md <- dpkg::dpkg_meta(from)
+  gd_vintage <- ifelse(any(grepl("census_tract_id_2010", names(from), fixed = TRUE)), "2019", "2020")
+  gd <-
+    cincy_census_geo("tract", gd_vintage) |>
+    dplyr::left_join(from, by = c("geoid" = paste0("census_tract_id_", ifelse(gd_vintage == "2019", "2010", "2020"))))
+  if (is.null(to)) {
+    return(gd)
+  }
+  return("interpolation is under construction.....")
+}
+
+cincy_block_weights <- function() {
+  tiger_url <- "https://www2.census.gov/geo/tiger/TIGER2020/TABBLOCK20/tl_2020_39_tabblock20.zip"
+  tiger_local <- dpkg::stow_url(tiger_url)
+  rd <-
+    sf::read_sf(glue::glue("/vsizip/", tiger_local),
+      query = glue::glue("SELECT GEOID20,ALAND20,HOUSING20,POP20 FROM tl_2020_39_tabblock20 WHERE COUNTYFP20 = '061'")
+    )
+  out <-
+    rd |>
+    sf::st_transform(5072) |>
+    sf::st_point_on_surface() |>
+    suppressWarnings() |>
+    dplyr::select(pop = POP20, homes = HOUSING20, area = ALAND20)
+  out$s2_geography <- sf::st_as_s2(out$geometry)
+  out <- sf::st_drop_geometry(out)
+  out <- sf::st_as_sf(out)
+  return(out)
+}
+
+from <-
+  cincy_census_geo("tract", "2020") |>
+  sf::st_transform(5072) |>
+  dplyr::mutate(n = 0.1)
+to <- sf::st_transform(cincy_zcta_geo("2020"), 5072)
+
+bw <-
+  cincy_block_weights() |>
+  sf::st_transform(5072) |>
+  dplyr::select(weight = pop, s2_geography)
+
+# from 'total weights'
+fromm <-
+  sf::st_join(from, bw, left = FALSE) |>
+  sf::st_drop_geometry() |>
+  dplyr::summarize(total_weight = sum(weight, na.rm = TRUE), .by = "geoid") |>
+  dplyr::right_join(from, by = "geoid") |>
+  sf::st_as_sf()
+
+# calculate intersections and intersection proportions
+intersections <-
+  fromm |>
+  sf::st_intersection(to) |>
+  dplyr::filter(sf::st_is(s2_geography, c("POLYGON", "MULTIPOLYGON", "GEOMETRYCOLLECTION"))) |>
+  dplyr::mutate(intersection_id = dplyr::row_number())
+
+weights <-
+  sf::st_join(bw, intersections) |>
+  sf::st_drop_geometry() |>
+  dplyr::summarize(
+    weight = sum(weight, na.rm = TRUE),
+    weight_coef = weight / total_weight,
+    .by = "intersection_id"
+  ) |>
+  dplyr::distinct(weight_coef, .keep_all = TRUE) |>
+  dplyr::right_join(intersections, by = "intersection_id") |>
+  dplyr::select(geoid, geoid.1, weight, weight_coef)
+
+# non-extensive (same total)
+fromm |>
+  sf::st_drop_geometry() |>
+  dplyr::left_join(weights, by = "geoid") |>
+  dplyr::summarize(n_interpolated = sum(n * weight_coef, na.rm = TRUE), .by = "geoid.1")
+
+# extensive (same average)
+fromm |>
+  sf::st_drop_geometry() |>
+  dplyr::left_join(weights, by = "geoid") |>
+  dplyr::summarize(n_interpolated = weighted.mean(n, weight, na.rm = TRUE), .by = "geoid.1")
+