Skip to content

Commit

Permalink
lint chapters (skip 09-tutorial-link-point-to-census-geoid.Rmd
Browse files Browse the repository at this point in the history
  • Loading branch information
mitchellmanware committed Feb 1, 2024
1 parent bd9474d commit 6fd4407
Show file tree
Hide file tree
Showing 15 changed files with 599 additions and 523 deletions.
3 changes: 0 additions & 3 deletions 00-setup.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ install.packages("styler")
### Rmarkdown

```{R car}
summary(cars)
```

Expand All @@ -16,6 +15,4 @@ summary(cars)
You can label code chunks with hyphens but we don't recommend using underscores or spaces. Think "kebabs, not snakes".

```{R plot}
```
141 changes: 77 additions & 64 deletions 02-census-income.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -43,27 +43,25 @@ The output file will contain one row per day between start_date and end_date for
install packages

```{r}
if(!require('shiny') || !require('tidycensus') || !require('tidyverse') || !require('viridis')) {
install.packages(c("shiny","tidycensus","tidyverse","viridis"))
if (!require("shiny") ||
!require("tidycensus") ||
!require("tidyverse") ||
!require("viridis")) {
install.packages(c("shiny", "tidycensus", "tidyverse", "viridis"))
}
if(!require('plotly')){
if (!require("plotly")) {
install.packages("plotly", type = "source")
}
```

```{r }
if(!require('ggplot2')) {
install.packages('ggplot2')
install.packages(c("maps","ggmap"))
if (!require("ggplot2")) {
install.packages("ggplot2")
install.packages(c("maps", "ggmap"))
}
if(!require('maps') || !require('ggmap')) {
install.packages(c("maps","ggmap"))
if (!require("maps") || !require("ggmap")) {
install.packages(c("maps", "ggmap"))
}
```

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.
Expand All @@ -75,22 +73,22 @@ library(tidyverse)
library(viridis)
library(shiny)
library(plotly)
#Only load the census key at the first time. then we set install=true
#census_api_key( install=TRUE)
# Only load the census key at the first time. then we set install=true
first <- FALSE
if (first == TRUE) {
census_api_key(install = TRUE)
}
options(tigris_use_cache = TRUE)
```

```{R}
library(readr)
df <- read_csv("./dataset/ms_patient_pm_census_v2.csv",show_col_types = FALSE)
df <- read_csv("./dataset/ms_patient_pm_census_v2.csv", show_col_types = FALSE)
```

```{r}
dim(df)
colnames(df)
```

### use case:
Expand All @@ -108,23 +106,25 @@ colnames(df)
Create a character vector named income_code containing eight elements. Each element represents a variable code related to income.

```{R}
income_code <- c("C17002_001","C17002_002","C17002_003","C17002_004","C17002_005","C17002_006",
"C17002_007", "C17002_008")
#Assign the result of the get_acs function to the variable tarr. The function retrieves
#American Community Survey (ACS) data for the specified geography (tract), variables
#(the income_code vector), state (Massachusetts with state code 25), geometry (TRUE to
#include spatial information), survey ("acs5"), and year (2012).
tarr <- get_acs(geography = "tract", variables = income_code,
state = 25, geometry = FALSE,survey = "acs5", year = 2012)
#Write the census income data into csv file
#modify if (FALSE) to if (TRUE) if you run the first time
income_code <- c(
"C17002_001", "C17002_002", "C17002_003", "C17002_004", "C17002_005", "C17002_006",
"C17002_007", "C17002_008"
)
# Assign the result of the get_acs function to the variable tarr. The function retrieves
# American Community Survey (ACS) data for the specified geography (tract), variables
# (the income_code vector), state (Massachusetts with state code 25), geometry (TRUE to
# include spatial information), survey ("acs5"), and year (2012).
tarr <- get_acs(
geography = "tract", variables = income_code,
state = 25, geometry = FALSE, survey = "acs5", year = 2012
)
# Write the census income data into csv file
# modify if (FALSE) to if (TRUE) if you run the first time
if (FALSE) {
write.csv(tarr,"./dataset/income_MA_mult_incomes.csv")
write.csv(tarr, "./dataset/income_MA_mult_incomes.csv")
}
```

9. tarr column name : • Id -- census track id • geoid, • name, • variable, • estimate, • moe
Expand All @@ -136,7 +136,6 @@ write.csv(tarr,"./dataset/income_MA_mult_incomes.csv")
```{r}
dim(tarr)
colnames(tarr)
```

### Join the dataframes by the census track id
Expand All @@ -147,79 +146,94 @@ Join these two data frames

```{r}
library(dplyr)
merge <- FALSE
if (merge == TRUE) {
merged_frame <- df %>%
mutate(census_tract_id_2010 = as.character(census_tract_id_2010)) %>%
inner_join(tarr, by = c("GEOID" = "census_tract_id_2010"))
}
#merged_frame <- df %>%
# mutate(census_tract_id_2010 = as.character(census_tract_id_2010)) %>%
# inner_join(tarr, by = c("GEOID" = "census_tract_id_2010"))
#left join produce the same result as inner_join
# left join produce the same result as inner_join
left_merged_frame <- df %>%
mutate(census_tract_id_2010 = as.character(census_tract_id_2010)) %>%
left_join(tarr, by = c("census_tract_id_2010" = "GEOID" ))
left_join(tarr, by = c("census_tract_id_2010" = "GEOID"))
#Write the merged frame data into csv file
#modify if (FALSE) to if (TRUE) if you run the first time
# Write the merged frame data into csv file
# modify if (FALSE) to if (TRUE) if you run the first time
if (FALSE) {
write.csv(left_merged_frame,"./dataset/merged_income_MA_mult_incomes.csv")
write.csv(left_merged_frame, "./dataset/merged_income_MA_mult_incomes.csv")
}
```

```{r}
library(dplyr)
```

## The county with hightest income in MA by sensus data 2020 {-}

```{r}
# Fetch ACS data for Massachusetts counties
ma_counties <- get_acs(geography = "county", variables = "B19013_001", state = "MA", year = 2020)
ma_counties <- get_acs(
geography = "county",
variables = "B19013_001",
state = "MA",
year = 2020
)
# Sort the data by median household income in descending order
ma_counties_sorted <- ma_counties %>%
ma_counties_sorted <- ma_counties %>%
arrange(desc(estimate))
# Extract the county with the highest income
highest_income_county <- ma_counties_sorted$NAME[1]
# Print the county with the highest income
cat("The county with the highest income in Massachusetts is:", highest_income_county)
cat("The county with the highest income in Massachusetts is:",
highest_income_county)
```
### install packages for the plot

```{r}
if(!require('maps') || !require('ggmap')) {
install.packages(c("maps","ggmap"))
if (!require("maps") || !require("ggmap")) {
install.packages(c("maps", "ggmap"))
library(maps)
library(ggmap)
}
library(ggplot2)
```
```{r}
colnames(left_merged_frame)
```
### Draw a map with patients and income in color
```{r}
# Draw Massachusetts map
# Draw Massachusetts map
ma_map <- map_data("state", region = "massachusetts")
#modify column's name
colnames(left_merged_frame)[which(names(left_merged_frame) == "lon")] <- "longitude"
colnames(left_merged_frame)[which(names(left_merged_frame) =="lat")] <- "latitude"
# modify column's name
colnames(left_merged_frame)[which(names(left_merged_frame) == "lon")] <-
"longitude"
colnames(left_merged_frame)[which(names(left_merged_frame) == "lat")] <-
"latitude"
#Optional Filter the rows based on the date range Use the subset() function to
#filter the rows based on the date range:2012 -2017to match 5 year sensus data
# Optional Filter the rows based on the date range Use the subset() function to
# filter the rows based on the date range:2012 -2017to match 5 year sensus data
filtered_df <- subset(left_merged_frame, date >= as.Date("2012-09-01") & date <= as.Date("2017-09-01"))
filtered_df <- subset(
left_merged_frame,
date >= as.Date("2012-09-01") & date <= as.Date("2017-09-01")
)
# Create a base plot for Massachusetts
p <- ggplot() +
geom_polygon(data = ma_map, aes(x = long, y = lat, group = group),
fill = "lightgray", color = "black") +
geom_polygon(
data = ma_map, aes(x = long, y = lat, group = group),
fill = "lightgray", color = "black"
) +
# Plot the data points
geom_point(data = filtered_df, aes(x = longitude, y = latitude, color = estimate), size = 3) +
geom_point(
data = filtered_df,
aes(x = longitude, y = latitude, color = estimate),
size = 3
) +
labs(title = "Income vs patient Map", x = "Longitude", y = "Latitude") +
scale_color_gradient(low = "blue", high = "red", name = "estimate") +
theme_minimal() +
Expand All @@ -228,7 +242,6 @@ p <- ggplot() +
# Print the map
print(p)
```


36 changes: 17 additions & 19 deletions 03-bivariate-choropleth_map.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -6,40 +6,38 @@

```{r}
library("knitr")
knitr::opts_chunk$set(fig.align="center", fig.width=6, fig.height=6)
options(width=90)
knitr::opts_chunk$set(fig.align = "center", fig.width = 6, fig.height = 6)
options(width = 90)
```

Single-variate choropleth maps

```{r}
if(!require('latticeExtra')){
if (!require("latticeExtra")) {
install.packages("latticeExtra", type = "source")
}
if(!require('pals')){
if (!require("pals")) {
install.packages("pals", type = "source")
}
```

```{r}
```{r, eval = FALSE}
require(latticeExtra) # USCancerRates, mapplot
require(maps) # map
require(classInt) # classIntervals, findCols
require(grid) # viewport, pushViewport
require(pals) # brewer.blues, stevens.pinkgreen
#This block of code Can run at your locally PC/MAC but can not knit to server
# suppressWarnings(print(
# mapplot(rownames(USCancerRates) ~ log(rate.male) + log(rate.female),
# data = USCancerRates,
# colramp = brewer.blues,
# map = map("county", plot = FALSE, fill = TRUE,
# projection = "tetra"),
# breaks=classIntervals(log(USCancerRates$rate.female), n=3, style='quantile')$brks,
# scales = list(draw = FALSE))
# ))
# This block of code Can run at your locally PC/MAC but can not knit to server
suppressWarnings(print(
mapplot(rownames(USCancerRates) ~ log(rate.male) + log(rate.female),
data = USCancerRates,
colramp = brewer.blues,
map = map("county", plot = FALSE, fill = TRUE,
projection = "tetra"),
breaks = classIntervals(log(USCancerRates$rate.female),
n = 3,
style = "quantile")$brks,
scales = list(draw = FALSE))
))
```
Loading

0 comments on commit 6fd4407

Please sign in to comment.