From 2e928b3127170247f14522d02ff70c84b974be94 Mon Sep 17 00:00:00 2001 From: Manuel Garcia Date: Fri, 16 Feb 2024 15:30:16 +0100 Subject: [PATCH 01/11] swap pros and cons in table console vs script --- episodes/01-intro-to-r.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/episodes/01-intro-to-r.Rmd b/episodes/01-intro-to-r.Rmd index 9593a135..320ac9a8 100644 --- a/episodes/01-intro-to-r.Rmd +++ b/episodes/01-intro-to-r.Rmd @@ -148,8 +148,8 @@ Each of the modes o interactions has its advantages and drawbacks. | | Console | R script| |--------|---------|---------| -|**Pros**|Immediate results|Work lost once you close RStudio | -|**Cons**|Complete record of your work |Messy if you just want to print things out| +|**Pros**|Immediate results| Complete record of your work | +|**Cons**| Work lost once you close RStudio | Messy if you just want to print things out| From e4b8e747eedf5d281994bf6c7200672a130c0b43 Mon Sep 17 00:00:00 2001 From: Manuel Garcia Date: Fri, 16 Feb 2024 16:03:21 +0100 Subject: [PATCH 02/11] remove episodes from path. The tutorial doesn't point to creating this directory. --- episodes/01-intro-to-r.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/episodes/01-intro-to-r.Rmd b/episodes/01-intro-to-r.Rmd index 320ac9a8..cccdbb1d 100644 --- a/episodes/01-intro-to-r.Rmd +++ b/episodes/01-intro-to-r.Rmd @@ -312,7 +312,7 @@ In the script, we will write: ```{r download-files} # Download the data download.file('https://bit.ly/geospatial_data', - here('episodes', 'data','gapminder_data.csv')) + here('data','gapminder_data.csv')) ``` From b1fe2e4606f92f7a6b85d171421eb73d5e50c5c8 Mon Sep 17 00:00:00 2001 From: Manuel Garcia Date: Fri, 16 Feb 2024 16:08:16 +0100 Subject: [PATCH 03/11] fix typo in function name. --- episodes/03-explore-data.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/episodes/03-explore-data.Rmd b/episodes/03-explore-data.Rmd index 5fab2d56..6ba0e7d5 100644 --- a/episodes/03-explore-data.Rmd +++ b/episodes/03-explore-data.Rmd @@ -68,7 +68,7 @@ For example, here is a figure depicting a data frame comprising a numeric, a cha We're gonna read in the `gapminder` data set with information about countries' size, GDP and average life expectancy in different years. ```{r reading-data} -gapminder <- read_csv("data/gapminder_data.csv") +gapminder <- read.csv("data/gapminder_data.csv") ``` From 8a58a2efffc4d6dd6d8a5ff5a8f173a75272116c Mon Sep 17 00:00:00 2001 From: Manuel Garcia Date: Fri, 16 Feb 2024 16:50:11 +0100 Subject: [PATCH 04/11] shorten code lines. move comments to top --- episodes/02-data-structures.Rmd | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/episodes/02-data-structures.Rmd b/episodes/02-data-structures.Rmd index bee71477..b623959e 100644 --- a/episodes/02-data-structures.Rmd +++ b/episodes/02-data-structures.Rmd @@ -65,13 +65,16 @@ You can create a vector with a `c()` function. ```{r vectors} -numeric_vector <- c(2, 6, 3) # vector of numbers - numeric data type. +# vector of numbers - numeric data type. +numeric_vector <- c(2, 6, 3) numeric_vector -character_vector <- c('banana', 'apple', 'orange') # vector of words - or strings of characters- character data type +# vector of words - or strings of characters- character data type +character_vector <- c('banana', 'apple', 'orange') character_vector -logical_vector <- c(TRUE, FALSE, TRUE) # vector of logical values (is something true or false?)- logical data type. +# vector of logical values (is something true or false?)- logical data type. +logical_vector <- c(TRUE, FALSE, TRUE) logical_vector ``` From d2ded892c9cd903d0d201bb81183aaf777d458eb Mon Sep 17 00:00:00 2001 From: Manuel Garcia Date: Fri, 16 Feb 2024 17:09:22 +0100 Subject: [PATCH 05/11] split code and comments for readability --- episodes/02-data-structures.Rmd | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/episodes/02-data-structures.Rmd b/episodes/02-data-structures.Rmd index b623959e..5e44f8eb 100644 --- a/episodes/02-data-structures.Rmd +++ b/episodes/02-data-structures.Rmd @@ -124,7 +124,9 @@ First, let's try to calculate mean for the values in this vector ```{r remove-na1} mean(with_na) # mean() function cannot interpret the missing values -mean(with_na, na.rm = T) # You can add the argument na.rm=TRUE to calculate the result while ignoring the missing values. +# You can add the argument na.rm=TRUE to calculate the result while +# ignoring the missing values. +mean(with_na, na.rm = T) ``` However, sometimes, you would like to have the `NA` @@ -133,9 +135,11 @@ For this you need to identify which elements of the vector hold missing values with `is.na()` function. ```{r remove-na2} -is.na(with_na) # This will produce a vector of logical values, stating if a statement 'This element of the vector is a missing value' is true or not +is.na(with_na) # This will produce a vector of logical values, +# stating if a statement 'This element of the vector is a missing value' +# is true or not -!is.na(with_na) # # The ! operator means negation ,i.e. not is.na(with_na) +!is.na(with_na) # The ! operator means negation, i.e. not is.na(with_na) ``` @@ -145,7 +149,8 @@ Sub-setting in `R` is done with square brackets`[ ]`. ```{r remove-na3} -without_na <- with_na[ !is.na(with_na) ] # this notation will return only the elements that have TRUE on their respective positions +without_na <- with_na[ !is.na(with_na) ] # this notation will return only +# the elements that have TRUE on their respective positions without_na @@ -173,7 +178,8 @@ known as levels. nordic_str <- c('Norway', 'Sweden', 'Norway', 'Denmark', 'Sweden') nordic_str # regular character vectors printed out -nordic_cat <- factor(nordic_str) # factor() function converts a vector to factor data type +# factor() function converts a vector to factor data type +nordic_cat <- factor(nordic_str) nordic_cat # With factors, R prints out additional information - 'Levels' ``` @@ -204,7 +210,13 @@ displayed in a plot or which category is taken as a baseline in a statistical mo You can reorder the categories using `factor()` function. This can be useful, for instance, to select a reference category (first level) in a regression model or for ordering legend items in a plot, rather than using the default category systematically (i.e. based on alphabetical order). ```{r factor-reorder1} -nordic_cat <- factor(nordic_cat, levels = c('Norway' , 'Denmark', 'Sweden')) # now Norway should be the first category, Denmark second and Sweden third +nordic_cat <- factor( + nordic_cat, levels = c( + 'Norway', + 'Denmark', + 'Sweden' + ) + ) # now Norway will be the first category, Denmark second and Sweden third nordic_cat ``` @@ -215,7 +227,8 @@ There is more than one way to reorder factors. Later in the lesson, we will use `fct_relevel()` function from `forcats` package to do the reordering. ```{r factor-reorder2} -# nordic_cat <- fct_relevel(nordic_cat, 'Norway' , 'Denmark', 'Sweden') # now Norway should be the first category, Denmark second and Sweden third +nordic_cat <- fct_relevel(nordic_cat, 'Norway' , 'Denmark', 'Sweden') # With +# this, Norway will be first category, Denmark second and Sweden third nordic_cat ``` @@ -243,7 +256,8 @@ outside of this set, it will become an unknown/missing value detonated by ```{r factor-missing-level} nordic_str nordic_cat2 <- factor(nordic_str, levels = c('Norway', 'Denmark')) -nordic_cat2 # since we have not included Sweden in the list of factor levels, it has become NA. +nordic_cat2 # since we have not included Sweden in the list of factor levels, +# it has become NA. ``` :::::::::::::::::::::::::::::::::::::::::::::::::::: From fbcbe087fc1a9d783c332d1b9b7d0e327f5ba3ad Mon Sep 17 00:00:00 2001 From: Manuel Garcia Date: Fri, 16 Feb 2024 17:29:53 +0100 Subject: [PATCH 06/11] shortern code line and comments --- episodes/02-data-structures.Rmd | 26 +++++++++++++++++++------- episodes/03-explore-data.Rmd | 7 +++++-- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/episodes/02-data-structures.Rmd b/episodes/02-data-structures.Rmd index 5e44f8eb..6071354e 100644 --- a/episodes/02-data-structures.Rmd +++ b/episodes/02-data-structures.Rmd @@ -215,9 +215,9 @@ nordic_cat <- factor( 'Norway', 'Denmark', 'Sweden' - ) - ) # now Norway will be the first category, Denmark second and Sweden third + )) +# now Norway will be the first category, Denmark second and Sweden third nordic_cat ``` @@ -227,8 +227,15 @@ There is more than one way to reorder factors. Later in the lesson, we will use `fct_relevel()` function from `forcats` package to do the reordering. ```{r factor-reorder2} -nordic_cat <- fct_relevel(nordic_cat, 'Norway' , 'Denmark', 'Sweden') # With -# this, Norway will be first category, Denmark second and Sweden third +library(forcats) + +nordic_cat <- fct_relevel( + nordic_cat, + 'Norway' , + 'Denmark', + 'Sweden' + ) # With this, Norway will be first category, + # Denmark second and Sweden third nordic_cat ``` @@ -255,9 +262,14 @@ outside of this set, it will become an unknown/missing value detonated by ```{r factor-missing-level} nordic_str -nordic_cat2 <- factor(nordic_str, levels = c('Norway', 'Denmark')) -nordic_cat2 # since we have not included Sweden in the list of factor levels, -# it has become NA. +nordic_cat2 <- factor( + nordic_str, + levels = c('Norway', 'Denmark') + ) + +# because we did not include Sweden in the list of +# factor levels, it has become NA. +nordic_cat2 ``` :::::::::::::::::::::::::::::::::::::::::::::::::::: diff --git a/episodes/03-explore-data.Rmd b/episodes/03-explore-data.Rmd index 6ba0e7d5..436d7ef7 100644 --- a/episodes/03-explore-data.Rmd +++ b/episodes/03-explore-data.Rmd @@ -108,7 +108,9 @@ When you're analyzing a data set, you often need to access its specific columns. One handy way to access a column is using it's name and a dollar sign `$`: ```{r subset-dollar-sign} -country_vec <- gapminder$country # Notation means: From dataset gapminder, give me column country. You can see that the column accessed in this way is just a vector of characters. +# This notation means: From dataset gapminder, give me column country. You can +# see that the column accessed in this way is just a vector of characters. +country_vec <- gapminder$country head(country_vec) @@ -157,7 +159,8 @@ We already know how to select only the needed columns. But now, we also want to In the `gapminder` data set, we want to see the results from outside of Europe for the 21st century. ```{r} year_country_gdp_euro <- gapminder %>% - filter(continent != "Europe" & year >= 2000) %>% # & operator (AND) - both conditions must be met + filter(continent != "Europe" & year >= 2000) %>% # & operator (AND) - both + # conditions must be met select(year, country, gdpPercap) head(year_country_gdp_euro) From 72f96b0fcbb1ebbf3d1da6b1f435106226ddff62 Mon Sep 17 00:00:00 2001 From: Manuel Garcia Date: Fri, 16 Feb 2024 19:27:28 +0100 Subject: [PATCH 07/11] shorten line in codeblock --- episodes/03-explore-data.Rmd | 22 ++++--- episodes/04-intro-to-visualisation.Rmd | 88 +++++++++++++++----------- 2 files changed, 65 insertions(+), 45 deletions(-) diff --git a/episodes/03-explore-data.Rmd b/episodes/03-explore-data.Rmd index 436d7ef7..489b0826 100644 --- a/episodes/03-explore-data.Rmd +++ b/episodes/03-explore-data.Rmd @@ -59,7 +59,7 @@ Because columns are vectors, each column must contain a **single type of data** For example, here is a figure depicting a data frame comprising a numeric, a character, and a logical vector. ![](fig/data-frame.svg) -
*Source*:[Data Carpentry R for Social Scientists ](https://datacarpentry.org/r-socialsci/02-starting-with-data/index.html#what-are-data-frames-and-tibbles) +
*Source*: [Data Carpentry R for Social Scientists ](https://datacarpentry.org/r-socialsci/02-starting-with-data/index.html#what-are-data-frames-and-tibbles) ## Reading data @@ -92,9 +92,11 @@ There are multiple ways to explore a data set. Here are just a few examples: ```{r} -head(gapminder) # see first 6 rows of the data set -summary(gapminder) # gives basic statistical information about each column. Information format differes by data type. +head(gapminder) # shows first 6 rows of the data set + +summary(gapminder) # basic statistical information about each column. +# Information format differes by data type. nrow(gapminder) # returns number of rows in a dataset @@ -159,9 +161,9 @@ We already know how to select only the needed columns. But now, we also want to In the `gapminder` data set, we want to see the results from outside of Europe for the 21st century. ```{r} year_country_gdp_euro <- gapminder %>% - filter(continent != "Europe" & year >= 2000) %>% # & operator (AND) - both - # conditions must be met + filter(continent != "Europe" & year >= 2000) %>% select(year, country, gdpPercap) +# '&' operator (AND) - both conditions must be met head(year_country_gdp_euro) ``` @@ -180,8 +182,9 @@ Write a single command (which can span multiple lines and includes pipes) that w ```{r ex5, class.source="bg-info"} year_country_gdp_eurasia <- gapminder %>% - filter(continent == "Europe" | continent == "Asia") %>% # | operator (OR) - one of the conditions must be met - select(year, country, gdpPercap) + filter(continent == "Europe" | continent == "Asia") %>% + select(year, country, gdpPercap) +# '|' operator (OR) - one of the conditions must be met nrow(year_country_gdp_eurasia) ``` @@ -194,7 +197,7 @@ So far, we have provided summary statistics on the whole dataset, selected colum ```{r dplyr-group} gapminder %>% # select the dataset group_by(continent) %>% # group by continent - summarize(avg_gdpPercap = mean(gdpPercap)) # summarize function creates statistics for the data set + summarize(avg_gdpPercap = mean(gdpPercap)) # create basic stats ``` @@ -214,7 +217,8 @@ Calculate the average life expectancy per country. Which country has the longest gapminder %>% group_by(country) %>% summarize(avg_lifeExp=mean(lifeExp)) %>% - filter(avg_lifeExp == min(avg_lifeExp) | avg_lifeExp == max(avg_lifeExp)) + filter(avg_lifeExp == min(avg_lifeExp) | + avg_lifeExp == max(avg_lifeExp) ) ``` ### Multiple groups and summary variables diff --git a/episodes/04-intro-to-visualisation.Rmd b/episodes/04-intro-to-visualisation.Rmd index 60f9b941..4530d441 100644 --- a/episodes/04-intro-to-visualisation.Rmd +++ b/episodes/04-intro-to-visualisation.Rmd @@ -48,24 +48,24 @@ After completing this episode, participants should be able to… # [Introduction to Visualisation](https://datacarpentry.org/r-intro-geospatial/07-plot-ggplot2/index.html) -The package `ggplot2` is a powerful plotting system. We will start with an introduction of key -features of `ggplot2`. In the following parts of this workshop, you will -use this package to visualize geospatial data. `gg` stands for grammar -of graphics, the idea that three components are needed to create a graph: +The package `ggplot2` is a powerful plotting system. We will start with an introduction of key features of `ggplot2`. `gg` stands for grammar of graphics. The idea idea behind it is that the following three components are needed to create a graph: - data, - aesthetics - a coordinate system on which we map the data (what is represented on x axis, what on y axis), and - geometries - visual representation of the data (points, bars, etc.) -Fun part about `ggplot2` is that you can add layers to -the plot to provide more information and to make it more beautiful. +A fun part about `ggplot2` is that you can add layers to the plot to provide more information and to make it more beautiful. -First, lets plot the distribution of life expectancy in the `gapminder` dataset: +In the following parts of this workshop, you will use this package to visualize geospatial data. First, lets plot the distribution of life expectancy in the `gapminder` dataset: ```{r ggplot} - ggplot(data = gapminder, aes(x = lifeExp) ) + # aesthetics layer - geom_histogram() # geometry layer +library(ggplot2) + +ggplot(data = gapminder, # data + aes(x = lifeExp) # aesthetics layer + ) + +geom_histogram() # geometry layer ``` @@ -78,10 +78,9 @@ Let's create another plot, this time only on a subset of observations: ```{r ggplot-col} gapminder %>% # we select a data set - filter(year == 2007 & - continent == 'Americas') %>% # and filter it to keep only one year and one continent + filter(year == 2007 & continent == 'Americas') %>% # filter to keep one year and one continent ggplot(aes(x = country, y = gdpPercap)) + # the x and y axes represent values of columns - geom_col() # we select a column graph as a geometry + geom_col() # we select a column graph as a geometry ``` Now, you can iteratively improve how the plot looks like. For example, @@ -92,7 +91,7 @@ gapminder %>% filter(year == 2007, continent == 'Americas') %>% ggplot(aes(x = country, y = gdpPercap)) + - geom_col()+ + geom_col() + coord_flip() # flip axes ``` @@ -123,11 +122,14 @@ gapminder %>% filter(year == 2007, continent == 'Americas') %>% mutate(country = fct_reorder(country, gdpPercap )) %>% - ggplot(aes(x = country, y = gdpPercap, fill = lifeExp )) + # fill argument for colouring surfaces, colour for points and lines - geom_col()+ + ggplot(aes( + x = country, + y = gdpPercap, + fill = lifeExp # use 'fill' for surfaces; 'colour' for points and lines + )) + + geom_col() + coord_flip() - ``` We can also adapt the colour scale. Common choice that is used for its @@ -138,11 +140,11 @@ readability and colorblind-proofness are the palettes available in the gapminder %>% filter(year == 2007, continent == 'Americas') %>% - mutate(country = fct_reorder(country, gdpPercap )) %>% - ggplot(aes(x = country, y = gdpPercap, fill = lifeExp )) + - geom_col()+ - coord_flip()+ - scale_fill_viridis_c() # _c stands for continuous scale + mutate(country = fct_reorder(country, gdpPercap)) %>% + ggplot(aes(x = country, y = gdpPercap, fill = lifeExp)) + + geom_col() + + coord_flip() + + scale_fill_viridis_c() # _c stands for continuous scale ``` @@ -154,16 +156,23 @@ p <- # this time let's save the plot in an object gapminder %>% filter(year == 2007 & continent == 'Americas') %>% - mutate(country = fct_reorder(country, gdpPercap ), - lifeExpCat = if_else(lifeExp >= mean(lifeExp), 'high', 'low')) %>% + mutate(country = fct_reorder(country, gdpPercap), + lifeExpCat = if_else( + lifeExp >= mean(lifeExp), + 'high', + 'low')) %>% ggplot(aes(x = country, y = gdpPercap, fill = lifeExpCat)) + - geom_col()+ - coord_flip()+ - scale_fill_manual(values = c('light blue', 'orange')) # customize the colours of the fill aesthetic + geom_col() + + coord_flip() + + scale_fill_manual(values = c( + 'light blue', + 'orange' + ) # customize the colors + ) ``` -Since we saved a plot as an object, nothing has been printed out. Just +Since we saved a plot as an object `p`, nothing has been printed out. Just like with any other object in `R`, if you want to see it, you need to call it. @@ -177,12 +186,12 @@ Now we can make use of the saved object and add things to it. Let's also give it a title and name the axes: ```{r ggplot-titles} -p <- - p + +p <- p + ggtitle('GDP per capita in Americas', subtitle = 'Year 2007') + xlab('Country')+ ylab('GDP per capita') +# show plot p ``` @@ -193,9 +202,12 @@ p Once we are happy with our plot we can save it in a format of our choice. Remember to save it in the dedicated folder. -```{r save-plot} +```{r save-plot, eval=FALSE} ggsave(plot = p, - filename = here('fig_output','plot_americas_2007.pdf')) # By default, ggsave() saves the last displayed plot, but you can also explicitly name the plot you want to save + filename = here('fig_output','plot_americas_2007.pdf') + ) +# By default, ggsave() saves the last displayed plot, but +# you can also explicitly name the plot you want to save ``` @@ -220,13 +232,17 @@ Another output of your work you want to save is a cleaned data set. In your analysis, you can then load directly that data set. Let's say we want to save the data only for Americas: -```{r writing-data} +```{r writing-data, eval=FALSE} gapminder_amr_2007 <- gapminder %>% filter(year == 2007 & continent == 'Americas') %>% - mutate(country_reordered = fct_reorder(country, gdpPercap ), - lifeExpCat = if_else(lifeExp >= mean(lifeExp), 'high', 'low')) - -write.csv(gapminder_amr_2007, here('data_output', 'gapminder_americas_2007.csv'), row.names=FALSE) + mutate(country_reordered = fct_reorder(country, gdpPercap), + lifeExpCat = if_else(lifeExp >= mean(lifeExp), 'high', 'low') + ) + +write.csv(gapminder_amr_2007, + here('data_output', 'gapminder_americas_2007.csv'), + row.names=FALSE + ) ``` ::::::::::::::::::::::::::::::::::::: keypoints From 007fe5f152490efbdb9ac784127767287f0f983f Mon Sep 17 00:00:00 2001 From: Manuel Garcia Date: Fri, 16 Feb 2024 19:44:09 +0100 Subject: [PATCH 08/11] shorten comments for readability --- episodes/04-intro-to-visualisation.Rmd | 10 ++++++++-- episodes/09-open-and-plot-vector-layers.Rmd | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/episodes/04-intro-to-visualisation.Rmd b/episodes/04-intro-to-visualisation.Rmd index 4530d441..188b74ac 100644 --- a/episodes/04-intro-to-visualisation.Rmd +++ b/episodes/04-intro-to-visualisation.Rmd @@ -57,10 +57,16 @@ The package `ggplot2` is a powerful plotting system. We will start with an intro A fun part about `ggplot2` is that you can add layers to the plot to provide more information and to make it more beautiful. -In the following parts of this workshop, you will use this package to visualize geospatial data. First, lets plot the distribution of life expectancy in the `gapminder` dataset: +In the following parts of this workshop, you will use this package to visualize geospatial data. First, make sure that you have the following packages loaded. + +```{r load-pkgs, eval=FALSE} +library(tidyverse) +library(terra) +``` + +Now, lets plot the distribution of life expectancy in the `gapminder` dataset: ```{r ggplot} -library(ggplot2) ggplot(data = gapminder, # data aes(x = lifeExp) # aesthetics layer diff --git a/episodes/09-open-and-plot-vector-layers.Rmd b/episodes/09-open-and-plot-vector-layers.Rmd index 3b8bcc40..22e97d18 100644 --- a/episodes/09-open-and-plot-vector-layers.Rmd +++ b/episodes/09-open-and-plot-vector-layers.Rmd @@ -141,7 +141,7 @@ Now, let's plot this shapefile. You are already familiar with the `ggplot2` pack ggplot(data = boundary_Delft) + geom_sf(size = 3, color = "black", fill = "cyan1") + labs(title = "Delft Administrative Boundary") + - coord_sf(datum = st_crs(28992)) # this is needed to display the axes in meters + coord_sf(datum = st_crs(28992)) # displays the axes in meters ``` ::::::::::::::::::::::::::::::::::::: challenge From 8342344f5b1fa6231119916cbd2268d6a2314ff9 Mon Sep 17 00:00:00 2001 From: Manuel Garcia Date: Fri, 16 Feb 2024 19:57:11 +0100 Subject: [PATCH 09/11] remove depricated argument 'size' in theme --- .../10-explore-and-plot-by-vector-layer-attributes.Rmd | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/episodes/10-explore-and-plot-by-vector-layer-attributes.Rmd b/episodes/10-explore-and-plot-by-vector-layer-attributes.Rmd index ca18aa96..4ca85f54 100644 --- a/episodes/10-explore-and-plot-by-vector-layer-attributes.Rmd +++ b/episodes/10-explore-and-plot-by-vector-layer-attributes.Rmd @@ -186,7 +186,9 @@ Now we can plot only the cycleways. ```{r fig.cap="Map of cycleways in Delft."} ggplot(data = cycleway_Delft) + geom_sf() + - labs(title = "Slow mobility network in Delft", subtitle = "Cycleways") + + labs(title = "Slow mobility network in Delft", + subtitle = "Cycleways" + ) + coord_sf(datum = st_crs(28992)) ``` @@ -371,13 +373,17 @@ p1 <- ggplot(data = lines_Delft_selection) + labs(title = "Mobility network of Delft", subtitle = "Roads & Cycleways - Default Legend") + coord_sf(datum = st_crs(28992)) + +# show plot p1 ``` ```{r fig.cap="Map of the mobility network in Delft with large-font and border around the legend."} p2 <- p1 + theme(legend.text = element_text(size = 20), - legend.box.background = element_rect(size = 1)) + legend.box.background = element_rect(linewidth = 1)) + +# show plot p2 ``` From 879775a7a23b2052991e568d78a8a6375bf79e39 Mon Sep 17 00:00:00 2001 From: Manuel Garcia Date: Fri, 16 Feb 2024 20:07:31 +0100 Subject: [PATCH 10/11] shorten line in challenge codeblocks --- ...re-and-plot-by-vector-layer-attributes.Rmd | 32 +++++++++++++------ 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/episodes/10-explore-and-plot-by-vector-layer-attributes.Rmd b/episodes/10-explore-and-plot-by-vector-layer-attributes.Rmd index 4ca85f54..bf947df7 100644 --- a/episodes/10-explore-and-plot-by-vector-layer-attributes.Rmd +++ b/episodes/10-explore-and-plot-by-vector-layer-attributes.Rmd @@ -128,8 +128,8 @@ head(point_Delft) We can increase the number of rows with the n argument (e.g., `head(n = 10)` to show 10 rows) until we see at least three distinct values in the leisure column. Note that printing an `sf` object will also display the first 10 rows. ```{r} -head(point_Delft, 10) # you might be lucky to see three distinct values -# point_Delft +head(point_Delft, 10) +# you might be lucky to see three distinct values ``` We have our answer (`sports_centre` is the third value), but in general this is not a good approach as the first rows might still have many `NA`s and three distinct values might still not be present in the first `n` rows of the data frame. To remove `NA`s, we can use the function `na.omit()` on the leisure column to remove `NA`s completely. Note that we use the `$` operator to examine the content of a single variable. @@ -141,7 +141,8 @@ head(na.omit(point_Delft$leisure)) # this is better To show only unique values, we can use the `levels()` function on a factor to only see the first occurrence of each distinct value. Note `NA`s are dropped in this case and that we get the first three of the unique alphabetically ordered values. ```{r} -head(levels(factor(point_Delft$leisure)), n = 3) # this is even better +head(levels(factor(point_Delft$leisure)), n = 3) +# this is even better ``` 3. To see a list of all attribute names, we can use the `names()` function. @@ -239,7 +240,9 @@ nrow(motorway_Delft) ```{r} ggplot(data = motorway_Delft) + geom_sf(linewidth = 1.5) + - labs(title = "Fast mobility network", subtitle = "Motorways") + + labs(title = "Fast mobility network", + subtitle = "Motorways" + ) + coord_sf(datum = st_crs(28992)) ``` @@ -260,7 +263,9 @@ nrow(pedestrian_Delft) ```{r} ggplot() + geom_sf(data = pedestrian_Delft) + - labs(title = "Slow mobility network", subtitle = "Pedestrian") + + labs(title = "Slow mobility network", + subtitle = "Pedestrian" + ) + coord_sf(datum = st_crs(28992)) ``` @@ -352,7 +357,8 @@ ggplot(data = lines_Delft_selection) + geom_sf(aes(linewidth = highway)) + scale_linewidth_manual(values = line_width) + labs(title = "Mobility network of Delft", - subtitle = "Roads & Cycleways - Line width varies") + + subtitle = "Roads & Cycleways - Line width varies" + ) + coord_sf(datum = st_crs(28992)) ``` @@ -405,16 +411,22 @@ levels(factor(lines_Delft$highway)) ``` ```{r} -# First, create a data frame with only those roads where bicycles are allowed +# First, create a data frame with only roads where bicycles +# are allowed lines_Delft_bicycle <- lines_Delft %>% filter(highway == "cycleway") -# Next, visualise using ggplot +# Next, visualise it using ggplot ggplot(data = lines_Delft) + geom_sf() + - geom_sf(data = lines_Delft_bicycle, aes(color = highway), linewidth = 1) + + geom_sf(data = lines_Delft_bicycle, + aes(color = highway), + linewidth = 1 + ) + scale_color_manual(values = "magenta") + - labs(title = "Mobility network in Delft", subtitle = "Roads dedicated to Bikes") + + labs(title = "Mobility network in Delft", + subtitle = "Roads dedicated to Bikes" + ) + coord_sf(datum = st_crs(28992)) ``` From f168602a36f8a8f0a1055d38637cd6698f4784c2 Mon Sep 17 00:00:00 2001 From: Manuel Garcia Date: Fri, 16 Feb 2024 20:33:39 +0100 Subject: [PATCH 11/11] shorten lines in codebloks for readability --- episodes/11-plot-multiple-shape-files.Rmd | 114 ++++++++++++++---- ...12-handling-spatial-projection-and-crs.Rmd | 48 ++++++-- 2 files changed, 126 insertions(+), 36 deletions(-) diff --git a/episodes/11-plot-multiple-shape-files.Rmd b/episodes/11-plot-multiple-shape-files.Rmd index 06496fa0..7acb16bf 100644 --- a/episodes/11-plot-multiple-shape-files.Rmd +++ b/episodes/11-plot-multiple-shape-files.Rmd @@ -57,8 +57,14 @@ To begin, we will create a plot with the site boundary as the first layer. Then ```{r} ggplot() + - geom_sf(data = boundary_Delft, fill = "lightgrey", color = "lightgrey") + - geom_sf(data = lines_Delft_selection, aes(color = highway), size = 1) + + geom_sf(data = boundary_Delft, + fill = "lightgrey", + color = "lightgrey" + ) + + geom_sf(data = lines_Delft_selection, + aes(color = highway), + size = 1 + ) + geom_sf(data = point_Delft) + labs(title = "Mobility network of Delft") + coord_sf(datum = st_crs(28992)) @@ -69,23 +75,49 @@ Next, let’s build a custom legend using the functions `scale_color_manual()` a ```{r} leisure_colors <- rainbow(15) point_Delft$leisure <- factor(point_Delft$leisure) + ggplot() + - geom_sf(data = boundary_Delft, fill = "lightgrey", color = "lightgrey") + - geom_sf(data = lines_Delft_selection, aes(color = highway), size = 1) + - geom_sf(data = point_Delft, aes(fill = leisure), shape = 21) + - scale_color_manual(values = road_colors, name = "Road Type") + - scale_fill_manual(values = leisure_colors, name = "Lesiure Location") + + geom_sf(data = boundary_Delft, + fill = "lightgrey", + color = "lightgrey" + ) + + geom_sf(data = lines_Delft_selection, + aes(color = highway), + size = 1 + ) + + geom_sf(data = point_Delft, + aes(fill = leisure), + shape = 21) + + scale_color_manual(values = road_colors, + name = "Road Type" + ) + + scale_fill_manual(values = leisure_colors, + name = "Lesiure Location" + ) + labs(title = "Mobility network and leisure in Delft") + coord_sf(datum = st_crs(28992)) ``` ```{r} ggplot() + - geom_sf(data = boundary_Delft, fill = "lightgrey", color = "lightgrey") + - geom_sf(data = lines_Delft_selection, aes(color = highway), size = 1) + - geom_sf(data = point_Delft, aes(fill = leisure), shape = 22) + - scale_color_manual(values = road_colors, name = "Line Type") + - scale_fill_manual(values = leisure_colors, name = "Leisure Location") + + geom_sf(data = boundary_Delft, + fill = "lightgrey", + color = "lightgrey" + ) + + geom_sf(data = lines_Delft_selection, + aes(color = highway), + size = 1 + ) + + geom_sf(data = point_Delft, + aes(fill = leisure), + shape = 22 + ) + + scale_color_manual(values = road_colors, + name = "Line Type" + ) + + scale_fill_manual(values = leisure_colors, + name = "Leisure Location" + ) + labs(title = "Mobility network and leisure in Delft") + coord_sf(datum = st_crs(28992)) ``` @@ -118,26 +150,56 @@ blue_orange <- c("cornflowerblue", "darkorange") ```{r} ggplot() + - geom_sf(data = lines_Delft_selection, aes(color = highway)) + - geom_sf(data = leisure_locations_selection, aes(fill = leisure), - shape = 21) + - scale_color_manual(name = "Line Type", values = road_colors, - guide = guide_legend(override.aes = list(linetype = "solid", shape = NA))) + - scale_fill_manual(name = "Soil Type", values = blue_orange, - guide = guide_legend(override.aes = list(linetype = "blank", shape = 21, colour = NA))) + + geom_sf(data = lines_Delft_selection, + aes(color = highway) + ) + + geom_sf(data = leisure_locations_selection, + aes(fill = leisure), + shape = 21 + ) + + scale_color_manual(name = "Line Type", + values = road_colors, + guide = guide_legend(override.aes = list( + linetype = "solid", + shape = NA + )) + ) + + scale_fill_manual(name = "Soil Type", + values = blue_orange, + guide = guide_legend(override.aes = list( + linetype = "blank", + shape = 21, + colour = NA + )) + ) + labs(title = "Traffic and leisure") + coord_sf(datum = st_crs(28992)) ``` ```{r} ggplot() + - geom_sf(data = lines_Delft_selection, aes(color = highway), size = 1) + - geom_sf(data = leisure_locations_selection, aes(fill = leisure, shape = leisure), size = 2) + - scale_shape_manual(name = "Leisure Type", values = c(21, 22)) + - scale_color_manual(name = "Line Type", values = road_colors) + - scale_fill_manual(name = "Leisure Type", values = rainbow(15), - guide = guide_legend(override.aes = list(linetype = "blank", shape = c(21, 22), - color = "black"))) + + geom_sf(data = lines_Delft_selection, + aes(color = highway), + size = 1 + ) + + geom_sf(data = leisure_locations_selection, + aes(fill = leisure, shape = leisure), + size = 2 + ) + + scale_shape_manual(name = "Leisure Type", + values = c(21, 22) + ) + + scale_color_manual(name = "Line Type", + values = road_colors + ) + + scale_fill_manual(name = "Leisure Type", + values = rainbow(15), + guide = guide_legend(override.aes = list( + linetype = "blank", + shape = c(21, 22), + color = "black" + )) + ) + labs(title = "Road network and leisure") + coord_sf(datum = st_crs(28992)) ``` diff --git a/episodes/12-handling-spatial-projection-and-crs.Rmd b/episodes/12-handling-spatial-projection-and-crs.Rmd index 24811951..ff0d77b0 100644 --- a/episodes/12-handling-spatial-projection-and-crs.Rmd +++ b/episodes/12-handling-spatial-projection-and-crs.Rmd @@ -50,8 +50,13 @@ country_boundary_NL <- st_read("data/nl-boundary.shp") ```{r} ggplot() + - geom_sf(data = country_boundary_NL, color = "gray18", linewidth = 2) + - geom_sf(data = municipal_boundary_NL, color = "gray40") + + geom_sf(data = country_boundary_NL, + color = "gray18", + linewidth = 2 + ) + + geom_sf(data = municipal_boundary_NL, + color = "gray40" + ) + labs(title = "Map of Contiguous NL Municipal Boundaries") + coord_sf(datum = st_crs(28992)) ``` @@ -76,9 +81,17 @@ boundary_Delft <- st_transform(boundary_Delft, 28992) ```{r} ggplot() + - geom_sf(data = country_boundary_NL, linewidth = 2, color = "gray18") + - geom_sf(data = municipal_boundary_NL, color = "gray40") + - geom_sf(data = boundary_Delft, color = "purple", fill = "purple") + + geom_sf(data = country_boundary_NL, + linewidth = 2, + color = "gray18" + ) + + geom_sf(data = municipal_boundary_NL, + color = "gray40" + ) + + geom_sf(data = boundary_Delft, + color = "purple", + fill = "purple" + ) + labs(title = "Map of Contiguous NL Municipal Boundaries") + coord_sf(datum = st_crs(28992)) ``` @@ -105,10 +118,23 @@ boundary_ZH <- municipal_boundary_NL %>% ```{r} ggplot() + - geom_sf(data = boundary_ZH, aes(color ="color"), show.legend = "line") + - scale_color_manual(name = "", labels = "Municipal Boundaries in South Holland", values = c("color" = "gray18")) + - geom_sf(data = boundary_Delft, aes(shape = "shape"), color = "purple", fill = "purple") + - scale_shape_manual(name = "", labels = "Municipality of Delft", values = c("shape" = 19)) + + geom_sf(data = boundary_ZH, + aes(color ="color"), + show.legend = "line" + ) + + scale_color_manual(name = "", + labels = "Municipal Boundaries in South Holland", + values = c("color" = "gray18") + ) + + geom_sf(data = boundary_Delft, + aes(shape = "shape"), + color = "purple", + fill = "purple" + ) + + scale_shape_manual(name = "", + labels = "Municipality of Delft", + values = c("shape" = 19) + ) + labs(title = "Delft location") + theme(legend.background = element_rect(color = NA)) + coord_sf(datum = st_crs(28992)) @@ -126,7 +152,9 @@ To save a file, use the `st_write()` function from the `sf` package. Although `s ```{r, eval=FALSE} st_write(leisure_locations_selection, - "data/leisure_locations_selection.shp", driver = "ESRI Shapefile") + "data/leisure_locations_selection.shp", + driver = "ESRI Shapefile" + ) ```