forked from dgrtwo/data-screencasts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
nyc-squirrels.Rmd
115 lines (90 loc) · 2.54 KB
/
nyc-squirrels.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
---
title: "Untitled"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r}
library(tidyverse)
theme_set(theme_light())
nyc_squirrels <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-10-29/nyc_squirrels.csv")
```
```{r}
nyc_squirrels %>%
count(zip_codes, sort = TRUE)
```
```{r}
nyc_squirrels %>%
ggplot(aes(long, lat)) +
geom_point()
by_hectare <- nyc_squirrels %>%
filter(!is.na(primary_fur_color)) %>%
group_by(hectare) %>%
summarize(long = mean(long),
lat = mean(lat),
pct_gray = mean(primary_fur_color == "Gray", na.rm = TRUE),
n = n())
by_hectare %>%
filter(n >= 10) %>%
ggplot(aes(long, lat, size = n, color = pct_gray)) +
geom_point() +
theme_void()
by_hectare %>%
filter(n >= 10) %>%
ggplot(aes(lat, pct_gray)) +
geom_point() +
geom_smooth()
by_hectare %>%
mutate(n_gray = round(pct_gray * n)) %>%
glm(cbind(n_gray, n - n_gray) ~ lat, data = ., family = "binomial") %>%
summary()
```
Squirrels may be more likely to be gray the higher north in the park you go, and more likely to be cinnamon.
```{r}
nyc_squirrels %>%
count(highlight_fur_color, sort = TRUE)
nyc_squirrels %>%
count(approaches, indifferent, runs_from, sort = TRUE)
```
Does a squirrel run away?
```{r}
glm(runs_from ~ lat, data = nyc_squirrels, family = "binomial") %>%
summary()
```
```{r}
library(sf)
central_park_sf <- read_sf("~/Downloads/CentralAndProspectParks/")
by_hectare <- nyc_squirrels %>%
add_count(hectare) %>%
mutate(above_ground = !is.na(location) & location == "Above Ground") %>%
group_by(hectare, n) %>%
summarize_at(vars(long, lat, approaches:runs_from, ends_with("ing"), above_ground), mean) %>%
ungroup()
by_hectare %>%
filter(n >= 10) %>%
ggplot() +
geom_sf(data = central_park_sf) +
geom_point(aes(long, lat, size = n, color = runs_from)) +
theme_void() +
scale_color_gradient2(low = "blue", high = "red", mid = "pink",
midpoint = .3, labels = scales::percent) +
labs(color = "% of squirrels run",
size = "# of squirrels",
title = "Squirrels in the northwest corner of Central Park are more likely to run away") +
coord_sf(datum = NA)
```
```{r}
central_park_sf %>%
count(lanes, sort = TRUE)
ggplot(central_park_sf) +
geom_sf() +
geom_point(aes(long, lat, color = runs_from), data = by_hectare) +
coord_sf(datum = NA)
```
```{r}
central_park_sf %>%
ggplot() +
geom_sf(aes(color = bicycle)) +
coord_sf(datum = NA)
```