✨ Initial commit for branch "dev_denton" (ndi v0.1.6.9010) (#27)

* Added `denton()` function to compute the aspatial racial or ethnic Relative Clustering (*RCL*) based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281)
idblr · Aug 30, 2024 · 7ee23cf · 7ee23cf
1 parent 5ea6a28
commit 7ee23cf
Show file tree

Hide file tree

Showing 16 changed files with 1,330 additions and 427 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: ndi
 Title: Neighborhood Deprivation Indices
-Version: 0.1.6.9009
+Version: 0.1.6.9010
 Date: 2024-08-30
 Authors@R:
     c(person(given = "Ian D.",

diff --git a/NAMESPACE b/NAMESPACE
@@ -5,6 +5,7 @@ export(atkinson)
 export(bell)
 export(bemanian_beyer)
 export(bravo)
+export(denton)
 export(duncan)
 export(duncan_cuzzort)
 export(duncan_duncan)

diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,6 @@
 # ndi (development version)
 
-## ndi v0.1.6.9009
+## ndi v0.1.6.9010
 
 ### New Features
 
@@ -12,11 +12,12 @@
 * Added `theil()` function the aspatial racial or ethnic Entropy (*H*) based on Theil (1972; ISBN:978-0-444-10378-9) and [Theil & Finizza (1971)](https://doi.org/110.1080/0022250X.1971.9989795)
 * Added `white_blau()` function to compute an index of spatial proximity (*SP*) based on [White (1986)](https://doi.org/10.2307/3644339) and Blau (1977; ISBN-13:978-0-029-03660-0)
 * Thank you for the feature suggestions above, [Symielle Gaston](https://orcid.org/0000-0001-9495-1592)
+* Added `denton()` function to compute the aspatial racial or ethnic Relative Clustering (*RCL*) based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281)
 * Added `duncan_duncan()` function to compute the aspatial racial or ethnic Relative Centralization (*RCE*) based on [Duncan & Duncan (1955b)](https://doi.org/10.1086/221609) and [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281)
 * Added `massey()` function to compute the aspatial racial or ethnic Absolute Clustering (*ACL*) based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281)
 
 #### New Function Capabilities
-* Added `geo_large = 'place'` for census-designated places, `geo_large = 'cbsa'` for core-based statistical areas, `geo_large = 'csa'` for combined statistical areas, and `geo_large = 'metro'` for metropolitan divisions as the larger geographical unit in `atkinson()`, `bell()`, `bemanian_beyer()`, `duncan()`, `duncan_cuzzort()`, `duncan_duncan()`, `hoover()`, `james_taeuber()`, `lieberson()`, `sudano()`, `theil()`, and `white()`, `white_blau()` functions.
+* Added `geo_large = 'place'` for census-designated places, `geo_large = 'cbsa'` for core-based statistical areas, `geo_large = 'csa'` for combined statistical areas, and `geo_large = 'metro'` for metropolitan divisions as the larger geographical unit in `atkinson()`, `bell()`, `bemanian_beyer()`, `denton()`, `duncan()`, `duncan_cuzzort()`, `duncan_duncan()`, `hoover()`, `james_taeuber()`, `lieberson()`, `sudano()`, `theil()`, and `white()`, `white_blau()` functions.
 * Added census block group computation for `anthopolos()` by specifying `geo == 'cbg'` or `geo == 'block group'`
 * Added `holder` argument to `atkinson()` function to toggle the computation with or without the Hölder mean. The function can now compute *A* without the Hölder mean. The default is `holder = FALSE`.
 * Added `crs` argument to `anthopolos()`, `bravo()`, and `white_blau()` functions to provide spatial projection of the distance-based metrics

diff --git a/R/denton.R b/R/denton.R
diff --git a/R/globals.R b/R/globals.R
@@ -270,6 +270,7 @@ globalVariables(
     'd',
     'crs',
     'RCE',
-    'ACL'
+    'ACL',
+    'RCL'
   )
 )
diff --git a/R/ndi-package.R b/R/ndi-package.R
@@ -22,6 +22,8 @@
 #' 
 #' \code{\link{bemanian_beyer}} Computes the aspatial Local Exposure and Isolation (\emph{LEx/Is}) based on Bemanian & Beyer (2017) \doi{10.1158/1055-9965.EPI-16-0926}.
 #' 
+#' \code{\link{denton}} Computes the aspatial Relative Clustering (\emph{RCL}) based on Massey & Denton (1988) \doi{10.1093/sf/67.2.281}.
+#'
 #' \code{\link{duncan}} Computes the aspatial Dissimilarity Index (\emph{D}) based on Duncan & Duncan (1955a) \doi{10.2307/2088328}.
 #' 
 #' \code{\link{duncan_cuzzort}} Computes the aspatial Absolute Centralization (\emph{ACE}) based on Duncan, Cuzzort, & Duncan (1961; LC:60007089) and Massey & Denton (1988) \doi{10.1093/sf/67.2.281}.

diff --git a/R/utils.R b/R/utils.R
@@ -8,9 +8,9 @@ ddd_fun <- function(x, omit_NAs) {
     NA
   } else {
     x_i <- xx$subgroup
-    n_i <- sum(xx$subgroup, na.rm = TRUE)
+    n_i <- sum(x_i, na.rm = TRUE)
     y_i <- xx$subgroup_ref
-    m_i <- sum(xx$subgroup_ref, na.rm = TRUE)
+    m_i <- sum(y_i, na.rm = TRUE)
     D <- 0.5 * sum(abs((x_i/n_i) - (y_i/m_i)), na.rm = TRUE)
     return(D)
   }
@@ -38,9 +38,9 @@ a_fun <- function(x, epsilon, omit_NAs, holder) {
       }
     } else {
       x_i <- xx$subgroup
-      X <- sum(xx$subgroup, na.rm = TRUE)
+      X <- sum(x_i, na.rm = TRUE)
       t_i <- xx$TotalPopE
-      N <- sum(xx$TotalPopE, na.rm = TRUE)
+      N <- sum(t_i, na.rm = TRUE)
       p_i <- x_i / t_i
       P <- X / N
       b <- epsilon
@@ -60,7 +60,7 @@ xpy_star_fun <- function(x, omit_NAs) {
     NA
   } else {
     x_i <- xx$subgroup
-    X <- sum(xx$subgroup, na.rm = TRUE)
+    X <- sum(x_i, na.rm = TRUE)
     y_i <- xx$subgroup_ixn
     t_i <- xx$TotalPopE
     xPy_star <- sum((x_i / X) * (y_i / t_i), na.rm = TRUE)
@@ -78,7 +78,7 @@ xpx_star_fun <- function(x, omit_NAs) {
     NA
   } else {
     x_i <- xx$subgroup
-    X <- sum(xx$subgroup, na.rm = TRUE)
+    X <- sum(x_i, na.rm = TRUE)
     t_i <- xx$TotalPopE
     xPx_star <- sum((x_i / X) * (x_i / t_i), na.rm = TRUE)
     return(xPx_star)
@@ -95,9 +95,9 @@ v_fun <- function(x, omit_NAs) {
     NA
   } else {
     x_i <- xx$subgroup
-    X <- sum(xx$subgroup, na.rm = TRUE)
+    X <- sum(x_i, na.rm = TRUE)
     t_i <- xx$TotalPopE
-    N <- sum(xx$TotalPopE, na.rm = TRUE)
+    N <- sum(t_i, na.rm = TRUE)
     xPx_star <- sum((x_i / X) * (x_i / t_i), na.rm = TRUE)
     P <- X / N
     V <- (xPx_star - P) / (1 - P)
@@ -117,8 +117,8 @@ lq_fun <- function(x, omit_NAs) {
     x_i <- xx$subgroup # x_im
     t_i <- xx$TotalPopE # X_i
     p_i <- x_i / t_i # p_im
-    X <- sum(xx$subgroup, na.rm = TRUE) # X_m
-    N <- sum(xx$TotalPopE, na.rm = TRUE) # X
+    X <- sum(x_i, na.rm = TRUE) # X_m
+    N <- sum(t_i, na.rm = TRUE) # X
     if (anyNA(p_i)) { p_i[is.na(p_i)] <- 0 }
     LQ <- p_i / (X / N) # (x_im/X_i)/(X_m/X)
     df <-  data.frame(LQ = LQ, GEOID = xx$GEOID)
@@ -139,8 +139,14 @@ lexis_fun <- function(x, omit_NAs) {
     if (anyNA(p_im)) { p_im[is.na(p_im)] <- 0 }
     p_in <- xx$subgroup_ixn / xx$TotalPopE
     if (anyNA(p_in)) { p_in[is.na(p_in) ] <- 0 }
-    P_m <- sum(xx$subgroup, na.rm = TRUE) / sum(xx$TotalPopE, na.rm = TRUE)
-    P_n <- sum(xx$subgroup_ixn, na.rm = TRUE) / sum(xx$TotalPopE, na.rm = TRUE)
+    x_i <- xx$subgroup
+    X <- sum(x_i, na.rm = TRUE)
+    y_i <- xx$subgroup_ixn
+    Y <- sum(y_i, na.rm = TRUE)
+    t_i <- xx$TotalPopE
+    N <- sum(t_i, na.rm = TRUE)
+    P_m <- X / N
+    P_n <- Y / N
     LExIs <- car::logit(p_im * p_in) - car::logit(P_m * P_n)
     df <-  data.frame(LExIs = LExIs, GEOID = xx$GEOID)
     return(df)
@@ -157,9 +163,9 @@ del_fun <- function(x, omit_NAs) {
     NA
   } else {
     x_i <- xx$subgroup
-    X <- sum(xx$subgroup, na.rm = TRUE)
+    X <- sum(x_i, na.rm = TRUE)
     a_i <- xx$ALAND
-    A <- sum(xx$ALAND, na.rm = TRUE)
+    A <- sum(a_i, na.rm = TRUE)
     DEL <- 0.5 * sum(abs((x_i / X) - (a_i / A)), na.rm = TRUE)
     return(DEL)
   }
@@ -181,12 +187,15 @@ sp_fun <- function(x, crs, omit_NAs) {
       units::set_units(value = km) %>%
       units::drop_units() %>%
       exp()
-    X <- sum(xx$subgroup, na.rm = TRUE)
-    Y <- sum(xx$subgroup_ref, na.rm = TRUE)
-    N <- sum(xx$TotalPopE, na.rm = TRUE)
-    P_xx <- sum((xx$subgroup * xx$subgroup * c_ij) / X^2, na.rm = TRUE)
-    P_xy <- sum((xx$subgroup * xx$subgroup_ref * c_ij) / (X * Y), na.rm = TRUE)
-    P_tt <- sum((xx$TotalPopE * xx$TotalPopE * c_ij) / N^2, na.rm = TRUE)
+    x_i <- xx$subgroup
+    X <- sum(x_i, na.rm = TRUE)
+    y_i <- xx$subgroup_ref
+    Y <- sum(y_i, na.rm = TRUE)
+    t_i <- xx$TotalPopE
+    N <- sum(t_i, na.rm = TRUE)
+    P_xx <- sum((x_i * x_i * c_ij) / X^2, na.rm = TRUE)
+    P_xy <- sum((x_i * y_i * c_ij) / (X * Y), na.rm = TRUE)
+    P_tt <- sum((t_i * t_i * c_ij) / N^2, na.rm = TRUE)
     SP <- ((X * P_xx) + (Y * P_xy)) / (N * P_tt)
     return(SP)
   }
@@ -202,9 +211,9 @@ g_fun <- function(x, omit_NAs) {
     NA
   } else {
     x_i <- xx$subgroup
-    X <- sum(xx$subgroup, na.rm = TRUE)
+    X <- sum(x_i, na.rm = TRUE)
     t_i <- xx$TotalPopE
-    N <- sum(xx$TotalPopE, na.rm = TRUE)
+    N <- sum(t_i, na.rm = TRUE)
     p_i <- x_i / t_i
     P <- X / N
     titj <- apply(expand.grid(t_i, t_i), MARGIN = 1, FUN = prod)
@@ -225,9 +234,9 @@ djt_fun <- function(x, omit_NAs) {
     NA
   } else {
     x_i <- xx$subgroup
-    X <- sum(xx$subgroup, na.rm = TRUE)
+    X <- sum(x_i, na.rm = TRUE)
     t_i <- xx$TotalPopE
-    N <- sum(xx$TotalPopE, na.rm = TRUE)
+    N <- sum(t_i, na.rm = TRUE)
     p_i <- x_i / t_i
     P <- X / N
     D <- sum(t_i * abs(p_i - P), na.rm = TRUE) / (2 * N * P * (1 - P))
@@ -249,9 +258,9 @@ h_fun <- function(x, omit_NAs) {
     NA
   } else {
     x_i <- xx$subgroup
-    X <- sum(xx$subgroup, na.rm = TRUE)
+    X <- sum(x_i, na.rm = TRUE)
     t_i <- xx$TotalPopE
-    N <- sum(xx$TotalPopE, na.rm = TRUE)
+    N <- sum(t_i, na.rm = TRUE)
     p_i <- x_i / t_i
     p_i[is.infinite(p_i)] <- 0
     P <- X / N
@@ -291,8 +300,9 @@ ace_fun <- function(x, lgeom, crs, omit_NAs) {
       sf::st_drop_geometry()
     x_i <- xx$subgroup
     x_n <- sum(x_i, na.rm = TRUE)
-    X_i <- cumsum(x_i / x_n) 
-    A_i <- cumsum(xx$ALAND / A$ALAND) 
+    X_i <- cumsum(x_i / x_n)
+    a_i <- xx$ALAND
+    A_i <- cumsum(a_i / A$ALAND) 
     I_i <- matrix(c(seq(1, (length(x_i)-1), 1), seq(2, length(x_i), 1)), ncol = 2)
     Xi_1Ai <- sum(X_i[I_i[, 1]] * A_i[I_i[, 2]], na.rm = TRUE)
     XiA1_1 <- sum(X_i[I_i[, 2]] * A_i[I_i[, 1]], na.rm = TRUE)
@@ -351,12 +361,39 @@ acl_fun <- function(x, crs, omit_NAs) {
       units::drop_units() %>%
       exp()
     x_i <- xx$subgroup
-    X <- sum(xx$subgroup, na.rm = TRUE)
-    n <- length(xx$subgroup)
+    X <- sum(x_i, na.rm = TRUE)
+    n <- length(x_i)
     t_i <- xx$TotalPopE
     num <- (sum(x_i / X, na.rm = TRUE) * sum(c_ij * x_i, na.rm = TRUE)) - ((X / n^2) * sum(c_ij, na.rm = TRUE))
     denom <- (sum(x_i / X, na.rm = TRUE) * sum(c_ij * t_i, na.rm = TRUE)) - ((X / n^2) * sum(c_ij, na.rm = TRUE))
     ACL <- num / denom
     return(ACL)
   }
 }
+
+# Internal function for Relative Clustering
+## From Denton & Massey (1988) https://doi.org/10.1093/sf/67.2.281
+## Returns NA value if only one smaller geography in a larger geography
+rcl_fun <- function(x, crs, omit_NAs) {
+  xx <- x[ , c('subgroup', 'subgroup_ref', 'ALAND')]
+  if (omit_NAs == TRUE) { xx <- xx[stats::complete.cases(sf::st_drop_geometry(xx)), ] }
+  if (nrow(sf::st_drop_geometry(x)) < 2 || any(sf::st_drop_geometry(xx) < 0) || any(is.na(sf::st_drop_geometry(xx)))) {
+    NA
+  } else {
+    xx <- xx %>% sf::st_transform(crs = crs)
+    d_ij <- suppressWarnings(sf::st_distance(sf::st_centroid(xx), sf::st_centroid(xx)))
+    diag(d_ij) <- sqrt(0.6 * xx$ALAND)
+    c_ij <- -d_ij %>% 
+      units::set_units(value = km) %>%
+      units::drop_units() %>%
+      exp()
+    x_i <- xx$subgroup
+    X <- sum(x_i, na.rm = TRUE)
+    y_i <- xx$subgroup_ref
+    Y <- sum(y_i, na.rm = TRUE)
+    P_xx <- sum((x_i * x_i * c_ij) / X^2, na.rm = TRUE)
+    P_yy <- sum((y_i * y_i * c_ij) / Y^2, na.rm = TRUE)
+    RCL <- (P_xx / P_yy) - 1
+    return(RCL)
+  }
+}
diff --git a/README.md b/README.md
@@ -12,7 +12,7 @@
 [![DOI](https://zenodo.org/badge/521439746.svg)](https://zenodo.org/badge/latestdoi/521439746)
 <!-- badges: end -->
 
-**Date repository last updated**: 2024-08-29
+**Date repository last updated**: 2024-08-30
 
 ### Overview
 
@@ -63,6 +63,10 @@ To install the development version from GitHub:
 <td>Compute the spatial Educational Isolation Index (<i>EI</i>) based on <a href='https://doi.org/10.3390/ijerph18179384'>Bravo et al. (2021)</a></td>
 </tr>
 <tr>
+<td><a href='/R/denton.R'><code>denton</code></a></td>
+<td>Compute the aspatial racial or ethnic Relative Clustering (<i>RCL</i>) based on <a href='https://doi.org/10.1093/sf/67.2.281'>Massey & Denton (1988)</a></td>
+</tr>
+<tr>
 <td><a href='/R/duncan.R'><code>duncan</code></a></td>
 <td>Compute the aspatial racial or ethnic Dissimilarity Index (<i>D</i>) based on <a href='https://doi.org/10.2307/2088328'>Duncan & Duncan (1955a)</a></td>
 </tr>
@@ -286,6 +290,7 @@ ggplot() +
     subtitle = 'Washington, D.C. tracts as the referent'
   )
 ```
+
 ![](man/figures/messer1.png)
 ![](man/figures/messer2.png)
 
@@ -323,7 +328,7 @@ powell_wiley_2020_DC$missing
 # Obtain the 2020 census tracts from the 'tigris' package
 tract_2020_DC <- tracts(state = 'DC', year = 2020, cb = TRUE)
 
-# Join the NDI (powell_wiley) values to the census tract geometry
+# Join the NDI (Powell-Wiley) values to the census tract geometry
 DC_2020_powell_wiley <- tract_2020_DC %>%
   left_join(powell_wiley_2020_DC$ndi, by = 'GEOID')
 DC_2020_powell_wiley <- DC_2020_powell_wiley %>%
@@ -460,6 +465,8 @@ cor(NDI_2020_DC$NDI.messer, NDI_2020_DC$NDI.powell_wiley, use = 'complete.obs')
 table(NDI_2020_DC$NDIQuart, NDI_2020_DC$NDIQuint)
 ```
 
+#### Additional indices of racial or ethnic residential segregation or socioeconomic disparity
+
 ``` r
 # ---------------------------------------------------- #
 # Compute spatial Racial Isoliation Index (Anthopolos) #
@@ -726,6 +733,58 @@ ggplot() +
 
 ![](man/figures/ei.png)
 
+```r
+# ------------------------------------------------------ #
+# Compute aspatial Relative Clustering (Massey & Denton) #
+# ------------------------------------------------------ #
+
+# Relative Clustering based on Massey & Denton (1988)
+## Selected subgroup: Not Hispanic or Latino, Black or African American alone
+## Selected subgroup reference: Not Hispanic or Latino, white alone
+## Selected large geography: census tract
+## Selected small geography: census block group
+RCL_2020_DC <- denton(
+  geo_large = 'tract',
+  geo_small = 'cbg',
+  state = 'DC',
+  year = 2020,
+  subgroup = 'NHoLB',
+  subgroup_ref = 'NHoLW'
+)
+
+# Obtain the 2020 census tracts from the 'tigris' package
+tract_2020_DC <- tracts(state = 'DC', year = 2020, cb = TRUE)
+
+# Join the RCL (Massey & Denton) values to the census tract geometry
+RCL_2020_DC <- tract_2020_DC %>%
+  left_join(RCL_2020_DC$rcl, by = 'GEOID')
+
+ggplot() +
+  geom_sf(
+    data = RCL_2020_DC,
+    aes(fill = RCL),
+    color = 'white'
+  ) +
+  theme_bw() +
+  scale_fill_gradient2(
+    low = '#998ec3', 
+    mid = '#f7f7f7', 
+    high = '#f1a340', 
+    midpoint = 0
+  )  +
+  labs(
+    fill = 'Index (Continuous)',
+    caption = 'Source: U.S. Census ACS 2016-2020 estimates'
+  ) +
+  ggtitle(
+    'Relative Clustering (Massey & Denton)\n
+    Washington, D.C. census block groups to tracts',
+    subtitle = 'Black non-Hispanic vs. white non-Hispanic'
+  )
+```
+
+![](man/figures/rcl.png)
+
 ```r
 # ----------------------------------------------------------------------- #
 # Compute aspatial racial or ethnic Dissimilarity Index (Duncan & Duncan) #
@@ -846,7 +905,7 @@ RCE_2020_DC <- duncan_duncan(
 # Obtain the 2020 census tracts from the 'tigris' package
 tract_2020_DC <- tracts(state = 'DC', year = 2020, cb = TRUE)
 
-# Join the ACE (Duncan & Cuzzort) values to the census tract geometry
+# Join the ACE (Duncan & Duncan) values to the census tract geometry
 RCE_2020_DC <- tract_2020_DC %>%
   left_join(RCE_2020_DC$rce, by = 'GEOID')
 
@@ -1278,7 +1337,7 @@ ACL_2020_DC <- massey(
 # Obtain the 2020 census tracts from the 'tigris' package
 tract_2020_DC <- tracts(state = 'DC', year = 2020, cb = TRUE)
 
-# Join the ACL (Duncan & Cuzzort) values to the census tract geometry
+# Join the ACL (Massey & Denton) values to the census tract geometry
 ACL_2020_DC <- tract_2020_DC %>%
   left_join(ACL_2020_DC$acl, by = 'GEOID')
-Original file line number
+Diff line change
@@ Expand Up / @@ -270,6 +270,7 @@ globalVariables( @@
         'd',
         'crs',
         'RCE',
-        'ACL'
+        'ACL',
+        'RCL'
       )
     )