rfordatascience · jonthegeek · Jan 19, 2025 · Jan 17, 2025 · Jan 17, 2025 · Jan 17, 2025
diff --git a/data/curated/water-insecurity/cleaning.R b/data/curated/water-insecurity/cleaning.R
@@ -0,0 +1,73 @@
+# Clean data compiled from code referenced in article (https://waterdata.usgs.gov/blog/acs-maps/). 
+# Code was revised to pull data for all US counties for years 2022 - 2023.
+
+# Load packages -----
+library(tidycensus)
+library(sf) 
+library(janitor) 
+library(tidyverse)
+
+# Helper functions -----
+get_census_data <- function(geography, var_names, year, proj, survey_var) {
+  df <- get_acs(
+    geography = geography,
+    variable = var_names,
+    year = year,
+    geometry = TRUE,
+    survey = survey_var) |>
+    clean_names() |>
+    st_transform(proj) |>
+    mutate(year = year)
+
+  return(df) 
+}
+
+# Grab relevant variables - B01003_001: total population, B25049_004: households lacking plumbing----
+vars <- c("B01003_001", "B25049_004")
+
+# Pull data for 2023 and 2022 for all US counties ------
+water_insecurity_2023 <- get_census_data(
+  geography = 'county', 
+  var_names = vars, 
+  year = "2023", 
+  proj = "EPSG:5070", 
+  survey_var = "acs1"
+) |>
+  mutate(
+    variable_long = case_when(
+      variable == "B01003_001" ~ "total_pop",
+      variable == "B25049_004" ~ "plumbing",
+      .default = NA_character_  
+    )
+  ) |> 
+  select(geoid, name, variable_long, estimate, geometry, year) |> 
+  pivot_wider(
+    names_from = variable_long,
+    values_from = estimate
+  ) |> 
+  mutate(
+    percent_lacking_plumbing = (plumbing / total_pop) * 100
+  )
+
+water_insecurity_2022 <- get_census_data(
+  geography = 'county', 
+  var_names = vars, 
+  year = "2022", 
+  proj = "EPSG:5070", 
+  survey_var = "acs1"
+) |>
+  mutate(
+    variable_long = case_when(
+      variable == "B01003_001" ~ "total_pop",
+      variable == "B25049_004" ~ "plumbing",
+      .default = NA_character_  
+    )
+  ) |> 
+  select(geoid, name, variable_long, estimate, geometry, year) |> 
+  pivot_wider(
+    names_from = variable_long,
+    values_from = estimate
+  ) |> 
+  mutate(
+    percent_lacking_plumbing = (plumbing / total_pop) * 100
+  )
diff --git a/data/curated/water-insecurity/instructions.md b/data/curated/water-insecurity/instructions.md
@@ -0,0 +1,30 @@
+## Prepare the dataset
+
+These instructions are for preparing a dataset using the R programming language.
+We hope to provide instructions for other programming languages eventually.
+
+If you have not yet set up your computer for submitting a dataset, please see the full instructions at <https://github.com/rfordatascience/tidytuesday/blob/main/.github/pr_instructions.md>.
+
+1.  `cleaning.R`: Modify the `cleaning.R` file to get and clean the data.
+    -   Write the code to download and clean the data in `cleaning.R`.
+    -   If you're getting the data from a github repo, remember to use the 'raw' version of the URL.
+    -   This script should result in one or more data.frames, with descriptive variable names (eg `players` and `teams`, not `df1` and `df2`).
+
+2.  `saving.R`: Use`saving.R` to save your datasets. This process creates both the `.csv` file(s) and the data dictionary template file(s) for your datasets. **Don't save the CSV files using a separate process because we also need the data dictionaries.**
+    -   Run the first line of `saving.R` to create the functions we'll use to save your dataset.
+    -   Provide the name of your directory as `dir_name`.
+    -   Use `ttsave()` for each dataset you created in `cleaning.R`, substituting the name for the dataset for `YOUR_DATASET_DF`.
+
+3.  `{dataset}.md`: Edit the `{dataset}.md` files to describe your datasets (where `{dataset}` is the name of the dataset). These files are created by `saving.R`. There should be one file for each of your datasets. You most likely only need to edit the "description" column to provide a description of each variable.
+
+4.  `intro.md`: Edit the `intro.md` file to describe your dataset. You don't need to add a `# Title` at the top; this is just a paragraph or two to introduce the week.
+
+5.  Find at least one image for your dataset. These often come from the article about your dataset. If you can't find an image, create an example data visualization, and save the images in your folder as `png` files.
+
+6.  `meta.yaml`: Edit `meta.yaml` to provide information about your dataset and how we can credit you. You can delete lines from the `credit` block that do not apply to you.
+
+### Submit your pull request with the data
+
+1.  Commit the changes with this folder to your branch. In RStudio, you can do this on the "Git" tab (the "Commit" button).
+
+2.  Submit a pull request to <https://github.com/rfordatascience/tidytuesday>. In R, you can do this with `usethis::pr_push()`, and then follow the instructions in your browser.
diff --git a/data/curated/water-insecurity/intro.md b/data/curated/water-insecurity/intro.md
@@ -0,0 +1,10 @@
+DESCRIPTION
+This week we're exploring water insecurity data featured in the article [Mapping water insecurity in R with tidycensus](https://waterdata.usgs.gov/blog/acs-maps/)!
+
+> Water insecurity can be influenced by number of social vulnerability indicators—from demographic characteristics to living conditions and socioeconomic status —that vary spatially across the U.S. This blog shows how the tidycensus package for R can be used to access U.S. Census Bureau data, including the American Community Surveys, as featured in the “Unequal Access to Water ” data visualization from the USGS Vizlab. It offers reproducible code examples demonstrating use of tidycensus for easy exploration and visualization of social vulnerability indicators in the Western U.S.
+
+QUESTION?
+
+- How does the lack of complete indoor plumbing compare between the 2023 and 2022 Census data? 
+- What counties have the greatest percent of households lacking plumbing?
+- Are there differences in indoor plumbing availability between Western U.S and Eastern U.S counties? 
diff --git a/data/curated/water-insecurity/meta.yaml b/data/curated/water-insecurity/meta.yaml
@@ -0,0 +1,19 @@
+title: Water Insecurity
+article:
+  title: Mapping water insecurity in R with tidycensus
+  url: https://waterdata.usgs.gov/blog/acs-maps/
+data_source:
+  title: US Census Data from tidycensus
+  url: https://cran.r-project.org/package=tidycensus
+images:
+# Please include at least one image, and up to three images
+- file: https://waterdata.usgs.gov/blog/static/acs-maps/tidycensus-intro-banner.png
+  alt: >
+    Banner that displays three choropleth maps displaying percent hispanic, median gross rent, and average household size using 2022 U.S. Census Bureau Data.
+credit:
+# We want to thank you for curating this dataset! If you do not want a 
+# particular type of credit, please delete the related line.
+  post: Niha Pereira
+  bluesky: https://bsky.app/profile/nnpereira
+  linkedin: https://www.linkedin.com/in/niha-pereira
+  github: https://github.com/nnpereira
diff --git a/data/curated/water-insecurity/saving.R b/data/curated/water-insecurity/saving.R
@@ -0,0 +1,10 @@
+# Run this
+source("data/curated/curation_scripts.R")
+
+# Fill in the name of the folder you created in "curated", then run this.
+dir_name <- "water-insecurity"
+
+# Run this for each of your datasets, replacing YOUR_DATASET_DF with the name of
+# a data.frame from cleaning.R.
+ttsave(water_insecurity_2022, dir_name = dir_name)
+ttsave(water_insecurity_2023, dir_name = dir_name)