carpentries-incubator · cforgaci · Apr 10, 2024 · Feb 18, 2024 · Feb 18, 2024 · Feb 18, 2024
diff --git a/episodes/01-intro-to-r.Rmd b/episodes/01-intro-to-r.Rmd
@@ -105,11 +105,11 @@ An alternative solution is to create the folders using R command `dir.create()`.
 In the console type: 
 
 ```{r create-directories}
-dir.create('data')
-dir.create('data_output')
-dir.create('documents')
-dir.create('fig_output')
-dir.create('scripts')
+dir.create("data")
+dir.create("data_output")
+dir.create("documents")
+dir.create("fig_output")
+dir.create("scripts")
 
 ```
 
@@ -218,7 +218,7 @@ We will however need to install the `here` package. To do so, please go to your
 script and type:
 
 ```{r install-here-package, eval=FALSE}
-install.packages('here')
+install.packages("here")
 ```
 
 :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: callout
@@ -311,8 +311,10 @@ In the script, we will write:
 
 ```{r download-files}
 # Download the data
-download.file('https://bit.ly/geospatial_data', 
-              here('data','gapminder_data.csv'))
+download.file(
+  "https://bit.ly/geospatial_data",
+  here("data", "gapminder_data.csv")
+)
 
 ```
 
@@ -337,9 +339,9 @@ will not cover these in the workshop.
 You can use R as calculator, you can for example write:
 
 ```{r calculator}
-1+100
-1*100
-1/100
+1 + 100
+1 * 100
+1 / 100
 
 ```
 
@@ -351,7 +353,7 @@ use them whenever we need to.
 We using the assignment operator `<-`, like this:
 
 ```{r asignment-operator}
-x <- 1/40
+x <- 1 / 40
 ```
 
 Notice that assignment does not print a value. Instead, we've stored it for later 

diff --git a/episodes/02-data-structures.Rmd b/episodes/02-data-structures.Rmd
@@ -211,11 +211,13 @@ You can reorder the categories using `factor()` function. This can be useful, fo
 
 ```{r factor-reorder1}
 nordic_cat <- factor(
-  nordic_cat, levels = c(
-    'Norway', 
-    'Denmark', 
-    'Sweden'
-  )) 
+  nordic_cat,
+  levels = c(
+    "Norway",
+    "Denmark",
+    "Sweden"
+  )
+)
 
 # now Norway will be the first category, Denmark second and Sweden third
 nordic_cat
@@ -230,12 +232,12 @@ we will use `fct_relevel()` function from `forcats` package to do the reordering
 library(forcats)
 
 nordic_cat <- fct_relevel(
-  nordic_cat, 
-  'Norway' , 
-  'Denmark', 
-  'Sweden'
-  ) # With this, Norway will be  first category, 
-    # Denmark second and Sweden third
+  nordic_cat,
+  "Norway",
+  "Denmark",
+  "Sweden"
+) # With this, Norway will be  first category,
+# Denmark second and Sweden third
 
 nordic_cat
 ```
@@ -263,13 +265,13 @@ outside of this set, it will become an unknown/missing value detonated by
 ```{r factor-missing-level}
 nordic_str
 nordic_cat2 <- factor(
-  nordic_str, 
-  levels = c('Norway', 'Denmark')
-  )
+  nordic_str,
+  levels = c("Norway", "Denmark")
+)
 
-# because we did not include Sweden in the list of 
+# because we did not include Sweden in the list of
 # factor levels, it has become NA.
-nordic_cat2 
+nordic_cat2
 ```
 ::::::::::::::::::::::::::::::::::::::::::::::::::::
 

diff --git a/episodes/03-explore-data.Rmd b/episodes/03-explore-data.Rmd
@@ -1,4 +1,4 @@
 ---
 title: 'Exploring Data Frames & Data frame Manipulation with dplyr '
 teaching: 10
 exercises: 2
@@ -95,7 +95,7 @@
 
 head(gapminder) # shows first 6  rows of the data set
 
-summary(gapminder) # basic statistical information about each column. 
+summary(gapminder) # basic statistical information about each column.
 # Information format differes by data type.
 
 nrow(gapminder) # returns number of rows in a dataset
@@ -110,9 +110,9 @@
 
 One handy way to access a column is using it's name and a dollar sign `$`: 
 ```{r subset-dollar-sign}
-# This notation means: From dataset gapminder, give me column country. You can 
-# see that the column accessed in this way is just a vector of characters. 
-country_vec <- gapminder$country 
+# This notation means: From dataset gapminder, give me column country. You can
+# see that the column accessed in this way is just a vector of characters.
+country_vec <- gapminder$country
 
 head(country_vec)
 
@@ -128,7 +128,7 @@
 First, we will adapt our data set, by keeping only the columns we're interested in, using the `select()` function from the `dplyr` package:
 
 ```{r dplyr-select}
-year_country_gdp <- select(gapminder, year, country, gdpPercap) 
+year_country_gdp <- select(gapminder, year, country, gdpPercap)
 
 head(year_country_gdp)
 
@@ -145,8 +145,8 @@
 
 ```{r dplyr-pipe}
 
-year_country_gdp <- gapminder %>% 
-  select(year,country,gdpPercap)
+year_country_gdp <- gapminder %>%
+  select(year, country, gdpPercap)
 
 head(year_country_gdp)
 
@@ -160,8 +160,8 @@
 
 In the `gapminder` data set, we want to see the results from outside of Europe for the 21st century. 
 ```{r}
-year_country_gdp_euro <- gapminder %>% 
-  filter(continent != "Europe" & year >= 2000) %>% 
+year_country_gdp_euro <- gapminder %>%
+  filter(continent != "Europe" & year >= 2000) %>%
   select(year, country, gdpPercap)
 # '&' operator (AND) - both conditions must be met
 
@@ -181,9 +181,9 @@
 
 
 ```{r ex5, class.source="bg-info"}
-year_country_gdp_eurasia <- gapminder %>% 
-  filter(continent == "Europe" | continent == "Asia") %>% 
-  select(year, country, gdpPercap) 
+year_country_gdp_eurasia <- gapminder %>%
+  filter(continent == "Europe" | continent == "Asia") %>%
+  select(year, country, gdpPercap)
 # '|' operator (OR) - one of the conditions must be met
 
 nrow(year_country_gdp_eurasia)
@@ -215,10 +215,10 @@
 
 ```{r ex6 , class.source="bg-info"}
 gapminder %>%
-   group_by(country) %>%
-   summarize(avg_lifeExp=mean(lifeExp)) %>%
-   filter(avg_lifeExp == min(avg_lifeExp) | 
-            avg_lifeExp == max(avg_lifeExp) )
+  group_by(country) %>%
+  summarize(avg_lifeExp = mean(lifeExp)) %>%
+  filter(avg_lifeExp == min(avg_lifeExp) |
+    avg_lifeExp == max(avg_lifeExp))
 ```
 
 ### Multiple groups and summary variables
@@ -235,14 +235,14 @@
 On top of this, you can also make multiple summaries of those groups:
 ```{r dplyr-summ}
 gdp_pop_bycontinents_byyear <- gapminder %>%
-  group_by(continent,year) %>%
+  group_by(continent, year) %>%
   summarize(
     avg_gdpPercap = mean(gdpPercap),
     sd_gdpPercap = sd(gdpPercap),
     avg_pop = mean(pop),
     sd_pop = sd(pop),
     n_obs = n()
-    )
+  )
 
 ```
 
@@ -252,8 +252,8 @@
 ```{r dplyr-count}
 
 gapminder %>%
-    group_by(continent) %>%
-    count()
+  group_by(continent) %>%
+  count()
 ```
 
 
@@ -263,7 +263,7 @@
 
 ```{r dplyr-mutate}
 gapminder_gdp <- gapminder %>%
-  mutate(gdpBillion = gdpPercap*pop/10^9)
+  mutate(gdpBillion = gdpPercap * pop / 10^9)
 
 head(gapminder_gdp)