-
Notifications
You must be signed in to change notification settings - Fork 0
/
covidexample.R
103 lines (81 loc) · 3.15 KB
/
covidexample.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
library(dplyr)
library(maps)
library(usmap)
library(ggplot2)
download_files = function() {
baseurl = "https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/counties/asrh/cc-est2019-agesex-XX.csv"
dat_list = NULL
Out = NULL
j = 1
for (i in 1:56){
print(i)
url = gsub('XX',sprintf('%02d',i),baseurl)
dat = tryCatch(
read.csv(url, header = T, as.is = T),
error= function(cond) {return(NULL)},
finally = print("Done")
)
if (!is.null(dat)){
dat_list[[j]] = dat
j = j+1
}
}
Out = dat_list[[1]]
for (i in 2:length(dat_list)) {
Out = bind_rows(Out, dat_list[[i]])
}
return(Out)
}
demog_dat = download_files()
# Just keep 2019 (which is YEAR == 12)
demog_dat = demog_dat %>% filter(YEAR == 12)
demog_dat$YEAR == 2019
demog_dat = demog_dat %>% mutate(fips = 1000*STATE + COUNTY)
covid_dat = read.csv("https://data.cdc.gov/api/views/k8wy-p9cg/rows.csv",
header=T, as.is=T)
covid_dat = covid_dat %>%
mutate(fips = 1000*FIPS.State+FIPS.County) %>%
filter(Indicator %in% 'Distribution of COVID-19 deaths (%)')
cov_dat_2 = read.csv('./data/Provisional_COVID-19_Death_Counts_in_the_United_States_by_County.csv',
header = T, as.is=T)
cov_dat_2 = cov_dat_2 %>% mutate(fips = FIPS.County.Code)
dat = left_join(demog_dat, cov_dat_2, by = 'fips' )
dat = dat %>% mutate(
u5frac = UNDER5_TOT / POPESTIMATE,
age513frac = AGE513_TOT / POPESTIMATE,
age1519frac = AGE1519_TOT / POPESTIMATE,
age2024frac = AGE2024_TOT / POPESTIMATE,
age2529frac = AGE2529_TOT / POPESTIMATE,
age3034frac = AGE3034_TOT / POPESTIMATE,
age3539frac = AGE3539_TOT / POPESTIMATE,
age4044frac = AGE4044_TOT / POPESTIMATE,
age4549frac = AGE4549_TOT / POPESTIMATE,
age5054frac = AGE5054_TOT / POPESTIMATE,
age5559frac = AGE5559_TOT / POPESTIMATE,
age6064frac = AGE6064_TOT / POPESTIMATE,
age6569frac = AGE6569_TOT / POPESTIMATE,
age7074frac = AGE7074_TOT / POPESTIMATE,
age7579frac = AGE7579_TOT / POPESTIMATE,
age8084frac = AGE8084_TOT / POPESTIMATE,
age85plusfrac = AGE85PLUS_TOT / POPESTIMATE,
totdeathfrac = Deaths.from.All.Causes / POPESTIMATE,
covdeathfrac = Deaths.involving.COVID.19 / POPESTIMATE,
covdeathpct = 100*covdeathfrac
)
# One county is an outlier, with a COVID death rate of 3.8% of the population, almost double the
# second-highest county (1.98%). Next after that are 1.8%, 1.7%, 1.2%. Those four counties over 1.2%
# really throw off the color scale of the map. Create another variable that chops those down to
# lower values.
dat$covdeathpcttrunc = ifelse(dat$covdeathpct > 1.2, 1.3, dat$covdeathpct)
plot_usmap(data = dat, values = 'covdeathpcttrunc', regions='counties') +
#scale_fill_continuous(low = 'white', high = 'red', name = "Pct of pop. killed by COVID", label = scales::comma)
scale_colour_gradient(
low = "white",
high = "red",
space = "Lab",
na.value = "grey50",
guide = "colourbar",
aesthetics = "fill"
)
plot_usmap(data = dat, values = 'COVID.19.Deaths', regions='counties') +
scale_fill_continuous(low = 'white', high = 'red', name = "Number killed by COVID", label = scales::comma)