Skip to content

Commit f21f967

Browse files
committed
implement @gjanee suggestions
1 parent 2ddb7a9 commit f21f967

File tree

2 files changed

+26
-15
lines changed

2 files changed

+26
-15
lines changed

_empty-hands-on.qmd

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,7 @@ Loading the necessary packages. DuckDB has its own R package that is mostly a wr
2727
#| message: false
2828
2929
library(tidyverse)
30-
library(dbplyr) # to query databases in a tidyverse style manner
31-
library(DBI) # to connect to databases
32-
# install.packages("duckdb") # install this package to get duckDB API
33-
library(duckdb) # Specific to duckDB
30+
3431
```
3532

3633
Import the csv files with the bird species information:
@@ -113,6 +110,15 @@ Ideally we would like the scientific names...
113110

114111
## Let's connect to our first database
115112

113+
```{r}
114+
#| message: false
115+
116+
library(dbplyr) # to query databases in a tidyverse style manner
117+
library(DBI) # to connect to databases
118+
# install.packages("duckdb") # install this package to get duckDB API
119+
library(duckdb) # Specific to duckDB
120+
```
121+
116122
### Load the bird database
117123

118124
This database has been built from the csv files we just analyzed, so the data should be very similar - note we did not say identical more on this in the last section:

hands-on.qmd

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,6 @@ Loading the necessary packages. DuckDB has its own R package that is mostly a wr
2727
#| message: false
2828
2929
library(tidyverse)
30-
library(dbplyr) # to query databases in a tidyverse style manner
31-
library(DBI) # to connect to databases
32-
# install.packages("duckdb") # install this package to get duckDB API
33-
library(duckdb) # Specific to duckDB
3430
```
3531

3632
Import the csv files with the bird species information:
@@ -108,14 +104,14 @@ Where W is the width and L the length of the egg
108104
We can use mutate to do so:
109105

110106
```{r}
111-
eggs_area_df <- eggs_csv %>%
107+
eggs_volume_df <- eggs_csv %>%
112108
mutate(egg_volume = pi/6*Width^2*Length)
113109
```
114110

115111
Now let's join this information to the nest table, and average by species
116112

117113
```{r}
118-
species_egg_volume_avg <- left_join(nests_csv, eggs_area_df, by="Nest_ID") %>%
114+
species_egg_volume_avg <- left_join(eggs_volume_df, nests_csv, by="Nest_ID") %>%
119115
group_by(Species) %>%
120116
summarise(egg_volume_avg = mean(egg_volume, na.rm = TRUE)) %>%
121117
arrange(desc(egg_volume_avg)) %>%
@@ -127,15 +123,24 @@ species_egg_volume_avg
127123
Ideally we would like the scientific names...
128124

129125
```{r}
130-
species_egg_area_avg <- species_study %>%
126+
species_egg_vol_avg <- species_study %>%
131127
inner_join(species_egg_volume_avg, by = join_by(Code == Species))
132128
133-
species_egg_area_avg
129+
species_egg_vol_avg
134130
```
135131

136132

137133
## Let's connect to our first database
138134

135+
```{r}
136+
#| message: false
137+
138+
library(dbplyr) # to query databases in a tidyverse style manner
139+
library(DBI) # to connect to databases
140+
# install.packages("duckdb") # install this package to get duckDB API
141+
library(duckdb) # Specific to duckDB
142+
```
143+
139144
### Load the bird database
140145

141146
This database has been built from the csv files we just analyzed, so the data should be very similar - note we did not say identical more on this in the last section:
@@ -245,7 +250,7 @@ Compute the volume using the same code as previously!! Yes, you can use mutate t
245250

246251
```{r}
247252
# Compute the egg volume
248-
eggs_area_db <- eggs_db %>%
253+
eggs_volume_db <- eggs_db %>%
249254
mutate(egg_volume = pi/6*Width^2*Length)
250255
```
251256

@@ -258,7 +263,7 @@ Now let's join this information to the nest table, and average by species
258263

259264
```{r}
260265
# Join the egg and nest tables to compute average
261-
species_egg_volume_avg_db <- left_join(nests_db, eggs_area_db, by="Nest_ID") %>%
266+
species_egg_volume_avg_db <- left_join(nests_db, eggs_volume_db, by="Nest_ID") %>%
262267
group_by(Species) %>%
263268
summarise(egg_volume_avg = mean(egg_volume, na.rm = TRUE)) %>%
264269
arrange(desc(egg_volume_avg)) %>%
@@ -271,7 +276,7 @@ species_egg_volume_avg_db
271276
What does this SQL query looks like?
272277

273278
```{r}
274-
species_egg_volume_avg_db <- left_join(nests_db, eggs_area_db, by="Nest_ID") %>%
279+
species_egg_volume_avg_db <- left_join(eggs_volume_db, nests_db, by="Nest_ID") %>%
275280
group_by(Species) %>%
276281
summarise(egg_volume_avg = mean(egg_volume, na.rm = TRUE)) %>%
277282
arrange(desc(egg_volume_avg)) %>%

0 commit comments

Comments
 (0)